#include <string.h>
#include "poly1305.h"
+#include "rsvr.h"
/*----- Global variables --------------------------------------------------*/
#endif
-/*----- Low-level implementation for 32/64-bit targets --------------------*/
+/*----- Low-level implementation for 16/32-bit targets --------------------*/
#ifndef POLY1305_IMPL
# define POLY1305_IMPL 11
uint32 c;
/* Do sequential carry propagation (16-bit CPUs are less likely to benefit
- * from instruction-level parallelism). Start at u_10; truncate it to 11
- * bits, and add the carry onto u_11. Truncate u_11 to 10 bits, and add
- * five times the carry onto u_0. And so on.
+ * from instruction-level parallelism). Start at u_9; truncate it to 11
+ * bits, and add the carry onto u_10. Truncate u10 to 11 bits, and add the
+ * carry onto u_11. Truncate u_11 to 10 bits, and add five times the carry
+ * onto u_0. And so on.
*
* The carry is larger than the pieces we're leaving behind. Let c_i be
* the high portion of u_i, to be carried onto u_{i+1}. I claim that c_i <
* carry is bounded above by 636*2^22 + 12785*2^10 < 2557*2^20. Hence, the
* carry out is at most 2557*2^10, as claimed.
*
- * Once we reach u_10 for the second time, we start with u_10 < 2^11. The
- * carry into u_10 is at most 2557*2^10 < 1279*2^11 as calculated above; so
- * the carry out into u_11 is at most 1280. Since u_11 < 2^10 prior to
- * this carry in, all of the u_i are now bounded above by 2^11. The final
- * reduction therefore only needs a conditional subtraction.
+ * Once we reach u_9 for the second time, we start with u_9 < 2^11. The
+ * carry into u_9 is at most 2557*2^10 < 1279*2^11 as calculated above; so
+ * the carry out into u_10 is at most 1280. Since u_10 < 2^11 prior to
+ * this carry in, we now have u_10 < 2^11 + 1280 < 2^12; so the carry out
+ * into u_11 is at most 1. The final reduction therefore only needs a
+ * conditional subtraction.
*/
- { c = u[10] >> 11; u[10] &= M11; }
+ { c = u[9] >> 11; u[9] &= M11; }
+ { u[10] += c; c = u[10] >> 11; u[10] &= M11; }
{ u[11] += c; c = u[11] >> 10; u[11] &= M10; }
{ u[0] += 5*c; c = u[0] >> 11; u[0] &= M11; }
for (i = 1; i < 5; i++) { u[i] += c; c = u[i] >> 11; u[i] &= M11; }
ctx->count++;
}
+static const rsvr_policy pol = { 0, 16, 16 };
+
void poly1305_hash(poly1305_ctx *ctx, const void *p, size_t sz)
{
- const octet *pp = p;
- size_t n;
-
- if (ctx->nbuf) {
- if (sz < 16 - ctx->nbuf) {
- memcpy(ctx->buf + ctx->nbuf, p, sz);
- ctx->nbuf += sz;
- return;
- }
- n = 16 - ctx->nbuf;
- memcpy(ctx->buf + ctx->nbuf, pp, n);
- update_full(ctx, ctx->buf);
- pp += n; sz -= n;
- }
- while (sz >= 16) {
- update_full(ctx, pp);
- pp += 16; sz -= 16;
- }
- if (sz) memcpy(ctx->buf, pp, sz);
- ctx->nbuf = sz;
+ rsvr_state st;
+ const octet *q = p;
+
+ rsvr_setup(&st, &pol, &ctx->buf, &ctx->nbuf, p, sz);
+ RSVR_DO(&st) while ((q = RSVR_NEXT(&st, 16)) != 0) update_full(ctx, q);
}
/* --- @poly1305_flush@ --- *
* far is a whole number of blocks. Flushing is performed
* automatically by @poly1305_done@, but it may be necessary to
* force it by hand when using @poly1305_concat@.
+ * (Alternatively, you might use @poly1305_flushzero@ instead.)
*
* Flushing a partial block has an observable effect on the
* computation: the resulting state is (with high probability)
#endif
mul_r(ctx, ctx->u.P.h, t);
- ctx->count++;
+ ctx->nbuf = 0; ctx->count++;
+}
+
+/* --- @poly1305_flushzero@ --- *
+ *
+ * Arguments: @poly1305_ctx *ctx@ = MAC context to flush
+ *
+ * Returns: ---
+ *
+ * Use: Forces any buffered message data in the context to be
+ * processed, by hashing between zero and fifteen additional
+ * zero bytes. Like @poly1305_flush@, this has no effect if the
+ * the message processed so far is a whole number of blocks.
+ * Unlike @poly1305_flush@, the behaviour if the message is not
+ * a whole number of blocks is equivalent to actually hashing
+ * some extra data.
+ */
+
+void poly1305_flushzero(poly1305_ctx *ctx)
+{
+ if (!ctx->nbuf) return;
+ memset(ctx->buf + ctx->nbuf, 0, 16 - ctx->nbuf);
+ update_full(ctx, ctx->buf);
ctx->nbuf = 0;
}
#ifdef TEST_RIG
+#include <mLib/macros.h>
#include <mLib/testrig.h>
+#include "ct.h"
+#include "rijndael-ecb.h"
+
static int vrf_hash(dstr v[])
{
poly1305_key k;
if (v[3].len != 16) { fprintf(stderr, "bad tag length\n"); exit(2); }
dstr_ensure(&t, 16); t.len = 16;
+ ct_poison(v[0].buf, v[0].len);
poly1305_keyinit(&k, v[0].buf, v[0].len);
for (i = 0; i < v[2].len; i++) {
for (j = i; j < v[2].len; j++) {
poly1305_hash(&ctx, v[2].buf + i, j - i);
poly1305_hash(&ctx, v[2].buf + j, v[2].len - j);
poly1305_done(&ctx, t.buf);
- if (memcmp(t.buf, v[3].buf, 16) != 0) {
+ ct_remedy(t.buf, t.len);
+ if (MEMCMP(t.buf, !=, v[3].buf, 16)) {
fprintf(stderr, "failed...");
fprintf(stderr, "\n\tkey = "); type_hex.dump(&v[0], stderr);
fprintf(stderr, "\n\tmask = "); type_hex.dump(&v[1], stderr);
poly1305_concat(&ctx, &ctx, &cc[2]);
}
poly1305_done(&ctx, t.buf);
- if (memcmp(t.buf, v[5].buf, 16) != 0) {
+ if (MEMCMP(t.buf, !=, v[5].buf, 16)) {
fprintf(stderr, "failed...");
fprintf(stderr, "\n\tkey = "); type_hex.dump(&v[0], stderr);
fprintf(stderr, "\n\tmask = "); type_hex.dump(&v[1], stderr);
return (ok);
}
+#define MSZMAX 1000
+
+static int vrf_mct(dstr v[])
+{
+ unsigned j, msz;
+ unsigned long i, niter;
+ rijndael_ecbctx rij;
+ poly1305_key key;
+ poly1305_ctx mac;
+ dstr d = DSTR_INIT;
+ octet k[16], r[16], n[16], s[16], *t, m[MSZMAX] = { 0 };
+ int ok = 1;
+
+ if (v[0].len != 16) { fprintf(stderr, "AES key len\n"); exit(2); }
+ if (v[1].len != 16) { fprintf(stderr, "poly key len\n"); exit(2); }
+ if (v[2].len != 16) { fprintf(stderr, "nonce len\n"); exit(2); }
+ if (v[4].len != 16) { fprintf(stderr, "result len\n"); exit(2); }
+ memcpy(k, v[0].buf, 16);
+ memcpy(r, v[1].buf, 16);
+ memcpy(n, v[2].buf, 16);
+ niter = *(unsigned long *)v[3].buf;
+ dstr_ensure(&d, 16); d.len = 16; t = (octet *)d.buf;
+
+ rijndael_ecbinit(&rij, k, 16, 0);
+ poly1305_keyinit(&key, r, 16);
+ for (i = 0; i < niter; i++) {
+ msz = 0;
+ for (;;) {
+ rijndael_ecbencrypt(&rij, n, s, 16);
+ poly1305_macinit(&mac, &key, s);
+ poly1305_hash(&mac, m, msz);
+ poly1305_done(&mac, t);
+ if (msz >= MSZMAX) break;
+ n[0] ^= i&0xff;
+ for (j = 0; j < 16; j++) n[j] ^= t[j];
+ if (msz%2) {
+ for (j = 0; j < 16; j++) k[j] ^= t[j];
+ rijndael_ecbinit(&rij, k, 16, 0);
+ }
+ if (msz%3) {
+ for (j = 0; j < 16; j++) r[j] ^= t[j];
+ poly1305_keyinit(&key, r, 16);
+ }
+ m[msz++] ^= t[0];
+ }
+ }
+
+ if (MEMCMP(t, !=, v[4].buf, 16)) {
+ ok = 0;
+ fprintf(stderr, "failed...");
+ fprintf(stderr, "\n\tinitial k = "); type_hex.dump(&v[0], stderr);
+ fprintf(stderr, "\n\tinitial r = "); type_hex.dump(&v[1], stderr);
+ fprintf(stderr, "\n\tinitial n = "); type_hex.dump(&v[2], stderr);
+ fprintf(stderr, "\n\titerations = %lu", niter);
+ fprintf(stderr, "\n\texpected = "); type_hex.dump(&v[4], stderr);
+ fprintf(stderr, "\n\tcalculated = "); type_hex.dump(&d, stderr);
+ fputc('\n', stderr);
+ }
+
+ dstr_destroy(&d);
+ return (ok);
+}
+
static const struct test_chunk tests[] = {
{ "poly1305-hash", vrf_hash,
{ &type_hex, &type_hex, &type_hex, &type_hex } },
{ "poly1305-cat", vrf_cat,
{ &type_hex, &type_hex, &type_hex, &type_hex, &type_hex, &type_hex } },
+ { "poly1305-mct", vrf_mct,
+ { &type_hex, &type_hex, &type_hex, &type_ulong, &type_hex } },
{ 0, 0, { 0 } }
};