From 15dfa998435a40ff3a7b1de4ec85541e8bac08dc Mon Sep 17 00:00:00 2001 Message-Id: <15dfa998435a40ff3a7b1de4ec85541e8bac08dc.1714982923.git.mdw@distorted.org.uk> From: Mark Wooding Date: Sun, 28 May 2017 19:03:08 +0100 Subject: [PATCH] symm/sha{,256,512}.c: Fold message-scheduling in with state update. Organization: Straylight/Edgeware From: Mark Wooding Previously, I implemented these compression functions in two steps: first performing the message expansion, and then applying the state update. Instead, save temporary space by interleaving the two steps. This also results in a small performance improvement. --- symm/sha.c | 205 +++++++++++++++++++++++--------------------------- symm/sha256.c | 170 ++++++++++++++++++----------------------- symm/sha512.c | 94 ++++++++++++----------- 3 files changed, 220 insertions(+), 249 deletions(-) diff --git a/symm/sha.c b/symm/sha.c index e4b50237..980fe802 100644 --- a/symm/sha.c +++ b/symm/sha.c @@ -49,29 +49,12 @@ void sha_compress(sha_ctx *ctx, const void *sbuf) { uint32 a, b, c, d, e; - uint32 buf[80]; + uint32 m[16]; + const octet *p; + int i; - /* --- Fetch the chaining variables --- */ - - a = ctx->a; - b = ctx->b; - c = ctx->c; - d = ctx->d; - e = ctx->e; - - /* --- Fetch and expand the buffer contents --- */ - - { - int i; - const octet *p; - - for (i = 0, p = sbuf; i < 16; i++, p += 4) - buf[i] = LOAD32(p); - for (i = 16; i < 80; i++) { - uint32 x = buf[i - 3] ^ buf[i - 8] ^ buf[i - 14] ^ buf[i - 16]; - buf[i] = ROL32(x, 1); - } - } + a = ctx->a; b = ctx->b; c = ctx->c; d = ctx->d; e = ctx->e; + for (p = sbuf, i = 0; i < 16; i++, p += 4) m[i] = LOAD32(p); /* --- Definitions for round functions --- */ @@ -80,7 +63,7 @@ void sha_compress(sha_ctx *ctx, const void *sbuf) #define H(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) #define T(v, w, x, y, z, i, f, k) do { \ - z = ROL32(v, 5) + f(w, x, y) + z + buf[i] + k; \ + z = ROL32(v, 5) + f(w, x, y) + z + m[i] + k; \ w = ROR32(w, 2); \ } while (0) @@ -89,99 +72,99 @@ void sha_compress(sha_ctx *ctx, const void *sbuf) #define HH(v, w, x, y, z, i) T(v, w, x, y, z, i, H, 0x8f1bbcdc) #define II(v, w, x, y, z, i) T(v, w, x, y, z, i, G, 0xca62c1d6) + /* --- Message scheduling --- */ + +#define M(i, i3, i8, i14) do { \ + uint32 t = m[i] ^ m[i3] ^ m[i8] ^ m[i14]; \ + m[i] = ROL32(t, 1); \ +} while (0) + /* --- The main compression function --- */ - FF(a, b, c, d, e, 0); - FF(e, a, b, c, d, 1); - FF(d, e, a, b, c, 2); - FF(c, d, e, a, b, 3); - FF(b, c, d, e, a, 4); - FF(a, b, c, d, e, 5); - FF(e, a, b, c, d, 6); - FF(d, e, a, b, c, 7); - FF(c, d, e, a, b, 8); - FF(b, c, d, e, a, 9); - FF(a, b, c, d, e, 10); - FF(e, a, b, c, d, 11); - FF(d, e, a, b, c, 12); - FF(c, d, e, a, b, 13); - FF(b, c, d, e, a, 14); - FF(a, b, c, d, e, 15); - FF(e, a, b, c, d, 16); - FF(d, e, a, b, c, 17); - FF(c, d, e, a, b, 18); - FF(b, c, d, e, a, 19); - - GG(a, b, c, d, e, 20); - GG(e, a, b, c, d, 21); - GG(d, e, a, b, c, 22); - GG(c, d, e, a, b, 23); - GG(b, c, d, e, a, 24); - GG(a, b, c, d, e, 25); - GG(e, a, b, c, d, 26); - GG(d, e, a, b, c, 27); - GG(c, d, e, a, b, 28); - GG(b, c, d, e, a, 29); - GG(a, b, c, d, e, 30); - GG(e, a, b, c, d, 31); - GG(d, e, a, b, c, 32); - GG(c, d, e, a, b, 33); - GG(b, c, d, e, a, 34); - GG(a, b, c, d, e, 35); - GG(e, a, b, c, d, 36); - GG(d, e, a, b, c, 37); - GG(c, d, e, a, b, 38); - GG(b, c, d, e, a, 39); - - HH(a, b, c, d, e, 40); - HH(e, a, b, c, d, 41); - HH(d, e, a, b, c, 42); - HH(c, d, e, a, b, 43); - HH(b, c, d, e, a, 44); - HH(a, b, c, d, e, 45); - HH(e, a, b, c, d, 46); - HH(d, e, a, b, c, 47); - HH(c, d, e, a, b, 48); - HH(b, c, d, e, a, 49); - HH(a, b, c, d, e, 50); - HH(e, a, b, c, d, 51); - HH(d, e, a, b, c, 52); - HH(c, d, e, a, b, 53); - HH(b, c, d, e, a, 54); - HH(a, b, c, d, e, 55); - HH(e, a, b, c, d, 56); - HH(d, e, a, b, c, 57); - HH(c, d, e, a, b, 58); - HH(b, c, d, e, a, 59); - - II(a, b, c, d, e, 60); - II(e, a, b, c, d, 61); - II(d, e, a, b, c, 62); - II(c, d, e, a, b, 63); - II(b, c, d, e, a, 64); - II(a, b, c, d, e, 65); - II(e, a, b, c, d, 66); - II(d, e, a, b, c, 67); - II(c, d, e, a, b, 68); - II(b, c, d, e, a, 69); - II(a, b, c, d, e, 70); - II(e, a, b, c, d, 71); - II(d, e, a, b, c, 72); - II(c, d, e, a, b, 73); - II(b, c, d, e, a, 74); - II(a, b, c, d, e, 75); - II(e, a, b, c, d, 76); - II(d, e, a, b, c, 77); - II(c, d, e, a, b, 78); - II(b, c, d, e, a, 79); + FF(a, b, c, d, e, 0); M( 0, 13, 8, 2); + FF(e, a, b, c, d, 1); M( 1, 14, 9, 3); + FF(d, e, a, b, c, 2); M( 2, 15, 10, 4); + FF(c, d, e, a, b, 3); M( 3, 0, 11, 5); + FF(b, c, d, e, a, 4); M( 4, 1, 12, 6); + FF(a, b, c, d, e, 5); M( 5, 2, 13, 7); + FF(e, a, b, c, d, 6); M( 6, 3, 14, 8); + FF(d, e, a, b, c, 7); M( 7, 4, 15, 9); + FF(c, d, e, a, b, 8); M( 8, 5, 0, 10); + FF(b, c, d, e, a, 9); M( 9, 6, 1, 11); + FF(a, b, c, d, e, 10); M(10, 7, 2, 12); + FF(e, a, b, c, d, 11); M(11, 8, 3, 13); + FF(d, e, a, b, c, 12); M(12, 9, 4, 14); + FF(c, d, e, a, b, 13); M(13, 10, 5, 15); + FF(b, c, d, e, a, 14); M(14, 11, 6, 0); + FF(a, b, c, d, e, 15); M(15, 12, 7, 1); + FF(e, a, b, c, d, 0); M( 0, 13, 8, 2); + FF(d, e, a, b, c, 1); M( 1, 14, 9, 3); + FF(c, d, e, a, b, 2); M( 2, 15, 10, 4); + FF(b, c, d, e, a, 3); M( 3, 0, 11, 5); + GG(a, b, c, d, e, 4); M( 4, 1, 12, 6); + GG(e, a, b, c, d, 5); M( 5, 2, 13, 7); + GG(d, e, a, b, c, 6); M( 6, 3, 14, 8); + GG(c, d, e, a, b, 7); M( 7, 4, 15, 9); + GG(b, c, d, e, a, 8); M( 8, 5, 0, 10); + GG(a, b, c, d, e, 9); M( 9, 6, 1, 11); + GG(e, a, b, c, d, 10); M(10, 7, 2, 12); + GG(d, e, a, b, c, 11); M(11, 8, 3, 13); + GG(c, d, e, a, b, 12); M(12, 9, 4, 14); + GG(b, c, d, e, a, 13); M(13, 10, 5, 15); + GG(a, b, c, d, e, 14); M(14, 11, 6, 0); + GG(e, a, b, c, d, 15); M(15, 12, 7, 1); + GG(d, e, a, b, c, 0); M( 0, 13, 8, 2); + GG(c, d, e, a, b, 1); M( 1, 14, 9, 3); + GG(b, c, d, e, a, 2); M( 2, 15, 10, 4); + GG(a, b, c, d, e, 3); M( 3, 0, 11, 5); + GG(e, a, b, c, d, 4); M( 4, 1, 12, 6); + GG(d, e, a, b, c, 5); M( 5, 2, 13, 7); + GG(c, d, e, a, b, 6); M( 6, 3, 14, 8); + GG(b, c, d, e, a, 7); M( 7, 4, 15, 9); + HH(a, b, c, d, e, 8); M( 8, 5, 0, 10); + HH(e, a, b, c, d, 9); M( 9, 6, 1, 11); + HH(d, e, a, b, c, 10); M(10, 7, 2, 12); + HH(c, d, e, a, b, 11); M(11, 8, 3, 13); + HH(b, c, d, e, a, 12); M(12, 9, 4, 14); + HH(a, b, c, d, e, 13); M(13, 10, 5, 15); + HH(e, a, b, c, d, 14); M(14, 11, 6, 0); + HH(d, e, a, b, c, 15); M(15, 12, 7, 1); + HH(c, d, e, a, b, 0); M( 0, 13, 8, 2); + HH(b, c, d, e, a, 1); M( 1, 14, 9, 3); + HH(a, b, c, d, e, 2); M( 2, 15, 10, 4); + HH(e, a, b, c, d, 3); M( 3, 0, 11, 5); + HH(d, e, a, b, c, 4); M( 4, 1, 12, 6); + HH(c, d, e, a, b, 5); M( 5, 2, 13, 7); + HH(b, c, d, e, a, 6); M( 6, 3, 14, 8); + HH(a, b, c, d, e, 7); M( 7, 4, 15, 9); + HH(e, a, b, c, d, 8); M( 8, 5, 0, 10); + HH(d, e, a, b, c, 9); M( 9, 6, 1, 11); + HH(c, d, e, a, b, 10); M(10, 7, 2, 12); + HH(b, c, d, e, a, 11); M(11, 8, 3, 13); + II(a, b, c, d, e, 12); M(12, 9, 4, 14); + II(e, a, b, c, d, 13); M(13, 10, 5, 15); + II(d, e, a, b, c, 14); M(14, 11, 6, 0); + II(c, d, e, a, b, 15); M(15, 12, 7, 1); + II(b, c, d, e, a, 0); + II(a, b, c, d, e, 1); + II(e, a, b, c, d, 2); + II(d, e, a, b, c, 3); + II(c, d, e, a, b, 4); + II(b, c, d, e, a, 5); + II(a, b, c, d, e, 6); + II(e, a, b, c, d, 7); + II(d, e, a, b, c, 8); + II(c, d, e, a, b, 9); + II(b, c, d, e, a, 10); + II(a, b, c, d, e, 11); + II(e, a, b, c, d, 12); + II(d, e, a, b, c, 13); + II(c, d, e, a, b, 14); + II(b, c, d, e, a, 15); /* --- Update the chaining variables --- */ - ctx->a += a; - ctx->b += b; - ctx->c += c; - ctx->d += d; - ctx->e += e; + ctx->a += a; ctx->b += b; ctx->c += c; ctx->d += d; ctx->e += e; } /* --- @sha_init@ --- * diff --git a/symm/sha256.c b/symm/sha256.c index cecab254..5de3966d 100644 --- a/symm/sha256.c +++ b/symm/sha256.c @@ -49,18 +49,13 @@ void sha256_compress(sha256_ctx *ctx, const void *sbuf) { uint32 a, b, c, d, e, f, g, h; - uint32 buf[64]; + uint32 m[16]; + const octet *p; + int i; - /* --- Fetch the chaining variables --- */ - - a = ctx->a; - b = ctx->b; - c = ctx->c; - d = ctx->d; - e = ctx->e; - f = ctx->f; - g = ctx->g; - h = ctx->h; + a = ctx->a; b = ctx->b; c = ctx->c; d = ctx->d; + e = ctx->e; f = ctx->f; g = ctx->g; h = ctx->h; + for (p = sbuf, i = 0; i < 16; i++, p += 4) m[i] = LOAD32(p); /* --- Definitions for round functions --- */ @@ -73,100 +68,85 @@ void sha256_compress(sha256_ctx *ctx, const void *sbuf) #define s1(x) (ROR32((x), 17) ^ ROR32((x), 19) ^ LSR32((x), 10)) #define T(a, b, c, d, e, f, g, h, i, k) do { \ - uint32 t1 = h + S1(e) + CH(e, f, g) + k + buf[i]; \ + uint32 t1 = h + S1(e) + CH(e, f, g) + k + m[i]; \ uint32 t2 = S0(a) + MAJ(a, b, c); \ d += t1; h = t1 + t2; \ } while (0) - /* --- Fetch and expand the buffer contents --- */ - - { - int i; - const octet *p; - - for (i = 0, p = sbuf; i < 16; i++, p += 4) - buf[i] = LOAD32(p); - for (i = 16; i < 64; i++) - buf[i] = s1(buf[i - 2]) + buf[i - 7] + s0(buf[i - 15]) + buf[i - 16]; - } +#define M(i, i2, i7, i15) \ + do { m[i] += s1(m[i2]) + m[i7] + s0(m[i15]); } while (0) /* --- The main compression function --- */ - T(a, b, c, d, e, f, g, h, 0, 0x428a2f98); - T(h, a, b, c, d, e, f, g, 1, 0x71374491); - T(g, h, a, b, c, d, e, f, 2, 0xb5c0fbcf); - T(f, g, h, a, b, c, d, e, 3, 0xe9b5dba5); - T(e, f, g, h, a, b, c, d, 4, 0x3956c25b); - T(d, e, f, g, h, a, b, c, 5, 0x59f111f1); - T(c, d, e, f, g, h, a, b, 6, 0x923f82a4); - T(b, c, d, e, f, g, h, a, 7, 0xab1c5ed5); - T(a, b, c, d, e, f, g, h, 8, 0xd807aa98); - T(h, a, b, c, d, e, f, g, 9, 0x12835b01); - T(g, h, a, b, c, d, e, f, 10, 0x243185be); - T(f, g, h, a, b, c, d, e, 11, 0x550c7dc3); - T(e, f, g, h, a, b, c, d, 12, 0x72be5d74); - T(d, e, f, g, h, a, b, c, 13, 0x80deb1fe); - T(c, d, e, f, g, h, a, b, 14, 0x9bdc06a7); - T(b, c, d, e, f, g, h, a, 15, 0xc19bf174); - T(a, b, c, d, e, f, g, h, 16, 0xe49b69c1); - T(h, a, b, c, d, e, f, g, 17, 0xefbe4786); - T(g, h, a, b, c, d, e, f, 18, 0x0fc19dc6); - T(f, g, h, a, b, c, d, e, 19, 0x240ca1cc); - T(e, f, g, h, a, b, c, d, 20, 0x2de92c6f); - T(d, e, f, g, h, a, b, c, 21, 0x4a7484aa); - T(c, d, e, f, g, h, a, b, 22, 0x5cb0a9dc); - T(b, c, d, e, f, g, h, a, 23, 0x76f988da); - T(a, b, c, d, e, f, g, h, 24, 0x983e5152); - T(h, a, b, c, d, e, f, g, 25, 0xa831c66d); - T(g, h, a, b, c, d, e, f, 26, 0xb00327c8); - T(f, g, h, a, b, c, d, e, 27, 0xbf597fc7); - T(e, f, g, h, a, b, c, d, 28, 0xc6e00bf3); - T(d, e, f, g, h, a, b, c, 29, 0xd5a79147); - T(c, d, e, f, g, h, a, b, 30, 0x06ca6351); - T(b, c, d, e, f, g, h, a, 31, 0x14292967); - T(a, b, c, d, e, f, g, h, 32, 0x27b70a85); - T(h, a, b, c, d, e, f, g, 33, 0x2e1b2138); - T(g, h, a, b, c, d, e, f, 34, 0x4d2c6dfc); - T(f, g, h, a, b, c, d, e, 35, 0x53380d13); - T(e, f, g, h, a, b, c, d, 36, 0x650a7354); - T(d, e, f, g, h, a, b, c, 37, 0x766a0abb); - T(c, d, e, f, g, h, a, b, 38, 0x81c2c92e); - T(b, c, d, e, f, g, h, a, 39, 0x92722c85); - T(a, b, c, d, e, f, g, h, 40, 0xa2bfe8a1); - T(h, a, b, c, d, e, f, g, 41, 0xa81a664b); - T(g, h, a, b, c, d, e, f, 42, 0xc24b8b70); - T(f, g, h, a, b, c, d, e, 43, 0xc76c51a3); - T(e, f, g, h, a, b, c, d, 44, 0xd192e819); - T(d, e, f, g, h, a, b, c, 45, 0xd6990624); - T(c, d, e, f, g, h, a, b, 46, 0xf40e3585); - T(b, c, d, e, f, g, h, a, 47, 0x106aa070); - T(a, b, c, d, e, f, g, h, 48, 0x19a4c116); - T(h, a, b, c, d, e, f, g, 49, 0x1e376c08); - T(g, h, a, b, c, d, e, f, 50, 0x2748774c); - T(f, g, h, a, b, c, d, e, 51, 0x34b0bcb5); - T(e, f, g, h, a, b, c, d, 52, 0x391c0cb3); - T(d, e, f, g, h, a, b, c, 53, 0x4ed8aa4a); - T(c, d, e, f, g, h, a, b, 54, 0x5b9cca4f); - T(b, c, d, e, f, g, h, a, 55, 0x682e6ff3); - T(a, b, c, d, e, f, g, h, 56, 0x748f82ee); - T(h, a, b, c, d, e, f, g, 57, 0x78a5636f); - T(g, h, a, b, c, d, e, f, 58, 0x84c87814); - T(f, g, h, a, b, c, d, e, 59, 0x8cc70208); - T(e, f, g, h, a, b, c, d, 60, 0x90befffa); - T(d, e, f, g, h, a, b, c, 61, 0xa4506ceb); - T(c, d, e, f, g, h, a, b, 62, 0xbef9a3f7); - T(b, c, d, e, f, g, h, a, 63, 0xc67178f2); + T(a, b, c, d, e, f, g, h, 0, 0x428a2f98); M( 0, 14, 9, 1); + T(h, a, b, c, d, e, f, g, 1, 0x71374491); M( 1, 15, 10, 2); + T(g, h, a, b, c, d, e, f, 2, 0xb5c0fbcf); M( 2, 0, 11, 3); + T(f, g, h, a, b, c, d, e, 3, 0xe9b5dba5); M( 3, 1, 12, 4); + T(e, f, g, h, a, b, c, d, 4, 0x3956c25b); M( 4, 2, 13, 5); + T(d, e, f, g, h, a, b, c, 5, 0x59f111f1); M( 5, 3, 14, 6); + T(c, d, e, f, g, h, a, b, 6, 0x923f82a4); M( 6, 4, 15, 7); + T(b, c, d, e, f, g, h, a, 7, 0xab1c5ed5); M( 7, 5, 0, 8); + T(a, b, c, d, e, f, g, h, 8, 0xd807aa98); M( 8, 6, 1, 9); + T(h, a, b, c, d, e, f, g, 9, 0x12835b01); M( 9, 7, 2, 10); + T(g, h, a, b, c, d, e, f, 10, 0x243185be); M(10, 8, 3, 11); + T(f, g, h, a, b, c, d, e, 11, 0x550c7dc3); M(11, 9, 4, 12); + T(e, f, g, h, a, b, c, d, 12, 0x72be5d74); M(12, 10, 5, 13); + T(d, e, f, g, h, a, b, c, 13, 0x80deb1fe); M(13, 11, 6, 14); + T(c, d, e, f, g, h, a, b, 14, 0x9bdc06a7); M(14, 12, 7, 15); + T(b, c, d, e, f, g, h, a, 15, 0xc19bf174); M(15, 13, 8, 0); + T(a, b, c, d, e, f, g, h, 0, 0xe49b69c1); M( 0, 14, 9, 1); + T(h, a, b, c, d, e, f, g, 1, 0xefbe4786); M( 1, 15, 10, 2); + T(g, h, a, b, c, d, e, f, 2, 0x0fc19dc6); M( 2, 0, 11, 3); + T(f, g, h, a, b, c, d, e, 3, 0x240ca1cc); M( 3, 1, 12, 4); + T(e, f, g, h, a, b, c, d, 4, 0x2de92c6f); M( 4, 2, 13, 5); + T(d, e, f, g, h, a, b, c, 5, 0x4a7484aa); M( 5, 3, 14, 6); + T(c, d, e, f, g, h, a, b, 6, 0x5cb0a9dc); M( 6, 4, 15, 7); + T(b, c, d, e, f, g, h, a, 7, 0x76f988da); M( 7, 5, 0, 8); + T(a, b, c, d, e, f, g, h, 8, 0x983e5152); M( 8, 6, 1, 9); + T(h, a, b, c, d, e, f, g, 9, 0xa831c66d); M( 9, 7, 2, 10); + T(g, h, a, b, c, d, e, f, 10, 0xb00327c8); M(10, 8, 3, 11); + T(f, g, h, a, b, c, d, e, 11, 0xbf597fc7); M(11, 9, 4, 12); + T(e, f, g, h, a, b, c, d, 12, 0xc6e00bf3); M(12, 10, 5, 13); + T(d, e, f, g, h, a, b, c, 13, 0xd5a79147); M(13, 11, 6, 14); + T(c, d, e, f, g, h, a, b, 14, 0x06ca6351); M(14, 12, 7, 15); + T(b, c, d, e, f, g, h, a, 15, 0x14292967); M(15, 13, 8, 0); + T(a, b, c, d, e, f, g, h, 0, 0x27b70a85); M( 0, 14, 9, 1); + T(h, a, b, c, d, e, f, g, 1, 0x2e1b2138); M( 1, 15, 10, 2); + T(g, h, a, b, c, d, e, f, 2, 0x4d2c6dfc); M( 2, 0, 11, 3); + T(f, g, h, a, b, c, d, e, 3, 0x53380d13); M( 3, 1, 12, 4); + T(e, f, g, h, a, b, c, d, 4, 0x650a7354); M( 4, 2, 13, 5); + T(d, e, f, g, h, a, b, c, 5, 0x766a0abb); M( 5, 3, 14, 6); + T(c, d, e, f, g, h, a, b, 6, 0x81c2c92e); M( 6, 4, 15, 7); + T(b, c, d, e, f, g, h, a, 7, 0x92722c85); M( 7, 5, 0, 8); + T(a, b, c, d, e, f, g, h, 8, 0xa2bfe8a1); M( 8, 6, 1, 9); + T(h, a, b, c, d, e, f, g, 9, 0xa81a664b); M( 9, 7, 2, 10); + T(g, h, a, b, c, d, e, f, 10, 0xc24b8b70); M(10, 8, 3, 11); + T(f, g, h, a, b, c, d, e, 11, 0xc76c51a3); M(11, 9, 4, 12); + T(e, f, g, h, a, b, c, d, 12, 0xd192e819); M(12, 10, 5, 13); + T(d, e, f, g, h, a, b, c, 13, 0xd6990624); M(13, 11, 6, 14); + T(c, d, e, f, g, h, a, b, 14, 0xf40e3585); M(14, 12, 7, 15); + T(b, c, d, e, f, g, h, a, 15, 0x106aa070); M(15, 13, 8, 0); + T(a, b, c, d, e, f, g, h, 0, 0x19a4c116); + T(h, a, b, c, d, e, f, g, 1, 0x1e376c08); + T(g, h, a, b, c, d, e, f, 2, 0x2748774c); + T(f, g, h, a, b, c, d, e, 3, 0x34b0bcb5); + T(e, f, g, h, a, b, c, d, 4, 0x391c0cb3); + T(d, e, f, g, h, a, b, c, 5, 0x4ed8aa4a); + T(c, d, e, f, g, h, a, b, 6, 0x5b9cca4f); + T(b, c, d, e, f, g, h, a, 7, 0x682e6ff3); + T(a, b, c, d, e, f, g, h, 8, 0x748f82ee); + T(h, a, b, c, d, e, f, g, 9, 0x78a5636f); + T(g, h, a, b, c, d, e, f, 10, 0x84c87814); + T(f, g, h, a, b, c, d, e, 11, 0x8cc70208); + T(e, f, g, h, a, b, c, d, 12, 0x90befffa); + T(d, e, f, g, h, a, b, c, 13, 0xa4506ceb); + T(c, d, e, f, g, h, a, b, 14, 0xbef9a3f7); + T(b, c, d, e, f, g, h, a, 15, 0xc67178f2); /* --- Update the chaining variables --- */ - ctx->a += a; - ctx->b += b; - ctx->c += c; - ctx->d += d; - ctx->e += e; - ctx->f += f; - ctx->g += g; - ctx->h += h; + ctx->a += a; ctx->b += b; ctx->c += c; ctx->d += d; + ctx->e += e; ctx->f += f; ctx->g += g; ctx->h += h; } /* --- @sha256_init@, @sha224_init@ --- * diff --git a/symm/sha512.c b/symm/sha512.c index ed022cac..a9a5180c 100644 --- a/symm/sha512.c +++ b/symm/sha512.c @@ -49,7 +49,9 @@ void sha512_compress(sha512_ctx *ctx, const void *sbuf) { kludge64 a, b, c, d, e, f, g, h; - kludge64 buf[80]; + kludge64 m[16]; + const kludge64 *k; + const octet *p; int i; static const kludge64 K[80] = { @@ -95,16 +97,9 @@ void sha512_compress(sha512_ctx *ctx, const void *sbuf) X64(5fcb6fab, 3ad6faec), X64(6c44198c, 4a475817) }; - /* --- Fetch the chaining variables --- */ - - a = ctx->a; - b = ctx->b; - c = ctx->c; - d = ctx->d; - e = ctx->e; - f = ctx->f; - g = ctx->g; - h = ctx->h; + a = ctx->a; b = ctx->b; c = ctx->c; d = ctx->d; + e = ctx->e; f = ctx->f; g = ctx->g; h = ctx->h; + for (p = sbuf, i = 0; i < 16; i++, p += 8) LOAD64_(m[i], p); /* --- Definitions for round functions --- */ @@ -130,50 +125,63 @@ void sha512_compress(sha512_ctx *ctx, const void *sbuf) #define T(a, b, c, d, e, f, g, h, i) do { \ kludge64 t1, t2, x; \ - ADD64(t1, buf[i], K[i]); ADD64(t1, t1, h); \ + ADD64(t1, m[i], k[i]); ADD64(t1, t1, h); \ S1(x, e); ADD64(t1, t1, x); CH(x, e, f, g); ADD64(t1, t1, x); \ S0(t2, a); MAJ(x, a, b, c); ADD64(t2, t2, x); \ ADD64(d, d, t1); ADD64(h, t1, t2); \ } while (0) - /* --- Fetch and expand the buffer contents --- */ - - { - const octet *p; - - for (i = 0, p = sbuf; i < 16; i++, p += 8) - LOAD64_(buf[i], p); - for (i = 16; i < 80; i++) { - kludge64 x; - buf[i] = buf[i - 7]; s1(x, buf[i - 2]); ADD64(buf[i], buf[i], x); - s0(x, buf[i - 15]); ADD64(buf[i], buf[i], x); - ADD64(buf[i], buf[i], buf[i - 16]); - } - } +#define M(i, i2, i7, i15) do { \ + kludge64 t; \ + ADD64(m[i], m[i], m[i7]); \ + s1(t, m[i2]); ADD64(m[i], m[i], t); \ + s0(t, m[i15]); ADD64(m[i], m[i], t); \ +} while (0) /* --- The main compression function --- */ - for (i = 0; i < 80; i += 8) { - T(a, b, c, d, e, f, g, h, i + 0); - T(h, a, b, c, d, e, f, g, i + 1); - T(g, h, a, b, c, d, e, f, i + 2); - T(f, g, h, a, b, c, d, e, i + 3); - T(e, f, g, h, a, b, c, d, i + 4); - T(d, e, f, g, h, a, b, c, i + 5); - T(c, d, e, f, g, h, a, b, i + 6); - T(b, c, d, e, f, g, h, a, i + 7); + for (i = 0, k = K; i < 64; i += 16, k += 16) { + T(a, b, c, d, e, f, g, h, 0); M( 0, 14, 9, 1); + T(h, a, b, c, d, e, f, g, 1); M( 1, 15, 10, 2); + T(g, h, a, b, c, d, e, f, 2); M( 2, 0, 11, 3); + T(f, g, h, a, b, c, d, e, 3); M( 3, 1, 12, 4); + T(e, f, g, h, a, b, c, d, 4); M( 4, 2, 13, 5); + T(d, e, f, g, h, a, b, c, 5); M( 5, 3, 14, 6); + T(c, d, e, f, g, h, a, b, 6); M( 6, 4, 15, 7); + T(b, c, d, e, f, g, h, a, 7); M( 7, 5, 0, 8); + T(a, b, c, d, e, f, g, h, 8); M( 8, 6, 1, 9); + T(h, a, b, c, d, e, f, g, 9); M( 9, 7, 2, 10); + T(g, h, a, b, c, d, e, f, 10); M(10, 8, 3, 11); + T(f, g, h, a, b, c, d, e, 11); M(11, 9, 4, 12); + T(e, f, g, h, a, b, c, d, 12); M(12, 10, 5, 13); + T(d, e, f, g, h, a, b, c, 13); M(13, 11, 6, 14); + T(c, d, e, f, g, h, a, b, 14); M(14, 12, 7, 15); + T(b, c, d, e, f, g, h, a, 15); M(15, 13, 8, 0); } + T(a, b, c, d, e, f, g, h, 0); + T(h, a, b, c, d, e, f, g, 1); + T(g, h, a, b, c, d, e, f, 2); + T(f, g, h, a, b, c, d, e, 3); + T(e, f, g, h, a, b, c, d, 4); + T(d, e, f, g, h, a, b, c, 5); + T(c, d, e, f, g, h, a, b, 6); + T(b, c, d, e, f, g, h, a, 7); + T(a, b, c, d, e, f, g, h, 8); + T(h, a, b, c, d, e, f, g, 9); + T(g, h, a, b, c, d, e, f, 10); + T(f, g, h, a, b, c, d, e, 11); + T(e, f, g, h, a, b, c, d, 12); + T(d, e, f, g, h, a, b, c, 13); + T(c, d, e, f, g, h, a, b, 14); + T(b, c, d, e, f, g, h, a, 15); + /* --- Update the chaining variables --- */ - ADD64(ctx->a, ctx->a, a); - ADD64(ctx->b, ctx->b, b); - ADD64(ctx->c, ctx->c, c); - ADD64(ctx->d, ctx->d, d); - ADD64(ctx->e, ctx->e, e); - ADD64(ctx->f, ctx->f, f); - ADD64(ctx->g, ctx->g, g); - ADD64(ctx->h, ctx->h, h); + ADD64(ctx->a, ctx->a, a); ADD64(ctx->b, ctx->b, b); + ADD64(ctx->c, ctx->c, c); ADD64(ctx->d, ctx->d, d); + ADD64(ctx->e, ctx->e, e); ADD64(ctx->f, ctx->f, f); + ADD64(ctx->g, ctx->g, g); ADD64(ctx->h, ctx->h, h); } /* --- @sha512_init@, etc. --- * -- [mdw]