#include "config.h"
#include "asm-common.h"
+///--------------------------------------------------------------------------
+/// Local utilities.
+
+// Magic constants for shuffling.
+#define ROTL 0x93
+#define ROT2 0x4e
+#define ROTR 0x39
+
///--------------------------------------------------------------------------
/// Main code.
// c += d; b ^= c; b <<<= 7
paddd xmm2, xmm3
- pshufd xmm3, xmm3, 0x93
+ pshufd xmm3, xmm3, ROTL
pxor xmm1, xmm2
- pshufd xmm2, xmm2, 0x4e
+ pshufd xmm2, xmm2, ROT2
movdqa xmm4, xmm1
pslld xmm1, 7
psrld xmm4, 25
//
// The shuffles have quite high latency, so they've mostly been
// pushed upwards. The remaining one can't be moved, though.
- pshufd xmm1, xmm1, 0x39
+ pshufd xmm1, xmm1, ROTR
// Apply the diagonal quarterround to each of the columns
// simultaneously.
// c += d; b ^= c; b <<<= 7
paddd xmm2, xmm3
- pshufd xmm3, xmm3, 0x39
+ pshufd xmm3, xmm3, ROTR
pxor xmm1, xmm2
- pshufd xmm2, xmm2, 0x4e
+ pshufd xmm2, xmm2, ROT2
movdqa xmm4, xmm1
pslld xmm1, 7
psrld xmm4, 25
// Finally, finish off undoing the transpose, and we're done for this
// doubleround. Again, most of this was done above so we don't have
// to wait for the shuffles.
- pshufd xmm1, xmm1, 0x93
+ pshufd xmm1, xmm1, ROTL
// Decrement the loop counter and see if we should go round again.
sub ecx, 2
.globl F(abort)
.globl F(rijndael_rcon)
+///--------------------------------------------------------------------------
+/// Local utilities.
+
+// Magic constants for shuffling.
+#define ROTL 0x93
+#define ROT2 0x4e
+#define ROTR 0x39
+
///--------------------------------------------------------------------------
/// Main code.
// open-coding the whole thing. It's much easier to leave that as
// zero and XOR in the round constant by hand.
9: movd xmm0, eax
- pshufd xmm0, xmm0, 0x39
+ pshufd xmm0, xmm0, ROTR
aeskeygenassist xmm1, xmm0, 0
- pshufd xmm1, xmm1, 0x93
+ pshufd xmm1, xmm1, ROTL
movd eax, xmm1
xor eax, [esi]
xor al, [ecx]
cmp ebx, 7
jb 0f
movd xmm0, eax
- pshufd xmm0, xmm0, 0x93
+ pshufd xmm0, xmm0, ROTL
aeskeygenassist xmm1, xmm0, 0
movd eax, xmm1
0: xor eax, [esi]
#include "config.h"
#include "asm-common.h"
+///--------------------------------------------------------------------------
+/// Local utilities.
+
+// Magic constants for shuffling.
+#define ROTL 0x93
+#define ROT2 0x4e
+#define ROTR 0x39
+
///--------------------------------------------------------------------------
/// Main code.
// d ^= (c + b) <<< 13
movdqa xmm4, xmm2
paddd xmm4, xmm1
- pshufd xmm1, xmm1, 0x93
+ pshufd xmm1, xmm1, ROTL
movdqa xmm5, xmm4
pslld xmm4, 13
psrld xmm5, 19
// a ^= (d + c) <<< 18
movdqa xmm4, xmm3
- pshufd xmm3, xmm3, 0x39
+ pshufd xmm3, xmm3, ROTR
paddd xmm4, xmm2
- pshufd xmm2, xmm2, 0x4e
+ pshufd xmm2, xmm2, ROT2
movdqa xmm5, xmm4
pslld xmm4, 18
psrld xmm5, 14
// d ^= (c + b) <<< 13
movdqa xmm4, xmm2
paddd xmm4, xmm3
- pshufd xmm3, xmm3, 0x93
+ pshufd xmm3, xmm3, ROTL
movdqa xmm5, xmm4
pslld xmm4, 13
psrld xmm5, 19
// a ^= (d + c) <<< 18
movdqa xmm4, xmm1
- pshufd xmm1, xmm1, 0x39
+ pshufd xmm1, xmm1, ROTR
paddd xmm4, xmm2
- pshufd xmm2, xmm2, 0x4e
+ pshufd xmm2, xmm2, ROT2
movdqa xmm5, xmm4
pslld xmm4, 18
psrld xmm5, 14
mov edx, [ebp + 16]
paddd xmm0, [esp + 0]
- pshufd xmm4, xmm0, 0x39
+ pshufd xmm4, xmm0, ROTR
movd [edx + 0], xmm0
paddd xmm1, [esp + 16]
- pshufd xmm5, xmm1, 0x93
+ pshufd xmm5, xmm1, ROTL
movd [edx + 16], xmm1
paddd xmm2, xmm6
- pshufd xmm6, xmm2, 0x4e
+ pshufd xmm6, xmm2, ROT2
movd [edx + 32], xmm2
paddd xmm3, xmm7
- pshufd xmm7, xmm3, 0x39
+ pshufd xmm7, xmm3, ROTR
movd [edx + 48], xmm3
movd [edx + 4], xmm7
- pshufd xmm7, xmm3, 0x4e
+ pshufd xmm7, xmm3, ROT2
movd [edx + 24], xmm7
- pshufd xmm3, xmm3, 0x93
+ pshufd xmm3, xmm3, ROTL
movd [edx + 44], xmm3
movd [edx + 8], xmm6
- pshufd xmm6, xmm2, 0x93
+ pshufd xmm6, xmm2, ROTL
movd [edx + 28], xmm6
- pshufd xmm2, xmm2, 0x39
+ pshufd xmm2, xmm2, ROTR
movd [edx + 52], xmm2
movd [edx + 12], xmm5
- pshufd xmm5, xmm1, 0x39
+ pshufd xmm5, xmm1, ROTR
movd [edx + 36], xmm5
- pshufd xmm1, xmm1, 0x4e
+ pshufd xmm1, xmm1, ROT2
movd [edx + 56], xmm1
movd [edx + 20], xmm4
- pshufd xmm4, xmm0, 0x4e
+ pshufd xmm4, xmm0, ROT2
movd [edx + 40], xmm4
- pshufd xmm0, xmm0, 0x93
+ pshufd xmm0, xmm0, ROTL
movd [edx + 60], xmm0
// Tidy things up.