chiark
/
gitweb
/
~mdw
/
catacomb
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
base/asm-common.h, *-x86ish-*.S: Centralize SSE shuffling constants.
[catacomb]
/
symm
/
salsa20-x86ish-sse2.S
diff --git
a/symm/salsa20-x86ish-sse2.S
b/symm/salsa20-x86ish-sse2.S
index 930508a15bbc31ec4f77e9af42f639a1568c7e48..5fa5b1516d01c6adeaf9ad857d0d421fd5bbae67 100644
(file)
--- a/
symm/salsa20-x86ish-sse2.S
+++ b/
symm/salsa20-x86ish-sse2.S
@@
-30,14
+30,6
@@
#include "config.h"
#include "asm-common.h"
#include "config.h"
#include "asm-common.h"
-///--------------------------------------------------------------------------
-/// Local utilities.
-
-// Magic constants for shuffling.
-#define ROTL 0x93
-#define ROT2 0x4e
-#define ROTR 0x39
-
///--------------------------------------------------------------------------
/// Main code.
///--------------------------------------------------------------------------
/// Main code.
@@
-182,7
+174,7
@@
FUNC(salsa20_core_x86ish_sse2)
// d ^= (c + b) <<< 13
movdqa xmm4, xmm2
paddd xmm4, xmm1
// d ^= (c + b) <<< 13
movdqa xmm4, xmm2
paddd xmm4, xmm1
- pshufd xmm1, xmm1,
ROTL
+ pshufd xmm1, xmm1,
SHUF(2, 1, 0, 3)
movdqa xmm5, xmm4
pslld xmm4, 13
psrld xmm5, 19
movdqa xmm5, xmm4
pslld xmm4, 13
psrld xmm5, 19
@@
-191,9
+183,9
@@
FUNC(salsa20_core_x86ish_sse2)
// a ^= (d + c) <<< 18
movdqa xmm4, xmm3
// a ^= (d + c) <<< 18
movdqa xmm4, xmm3
- pshufd xmm3, xmm3,
ROTR
+ pshufd xmm3, xmm3,
SHUF(0, 3, 2, 1)
paddd xmm4, xmm2
paddd xmm4, xmm2
- pshufd xmm2, xmm2,
ROT2
+ pshufd xmm2, xmm2,
SHUF(1, 0, 3, 2)
movdqa xmm5, xmm4
pslld xmm4, 18
psrld xmm5, 14
movdqa xmm5, xmm4
pslld xmm4, 18
psrld xmm5, 14
@@
-237,7
+229,7
@@
FUNC(salsa20_core_x86ish_sse2)
// d ^= (c + b) <<< 13
movdqa xmm4, xmm2
paddd xmm4, xmm3
// d ^= (c + b) <<< 13
movdqa xmm4, xmm2
paddd xmm4, xmm3
- pshufd xmm3, xmm3,
ROTL
+ pshufd xmm3, xmm3,
SHUF(2, 1, 0, 3)
movdqa xmm5, xmm4
pslld xmm4, 13
psrld xmm5, 19
movdqa xmm5, xmm4
pslld xmm4, 13
psrld xmm5, 19
@@
-246,9
+238,9
@@
FUNC(salsa20_core_x86ish_sse2)
// a ^= (d + c) <<< 18
movdqa xmm4, xmm1
// a ^= (d + c) <<< 18
movdqa xmm4, xmm1
- pshufd xmm1, xmm1,
ROTR
+ pshufd xmm1, xmm1,
SHUF(0, 3, 2, 1)
paddd xmm4, xmm2
paddd xmm4, xmm2
- pshufd xmm2, xmm2,
ROT2
+ pshufd xmm2, xmm2,
SHUF(1, 0, 3, 2)
movdqa xmm5, xmm4
pslld xmm4, 18
psrld xmm5, 14
movdqa xmm5, xmm4
pslld xmm4, 18
psrld xmm5, 14
@@
-272,39
+264,39
@@
FUNC(salsa20_core_x86ish_sse2)
movd [OUT + 0], xmm0
paddd xmm1, SAVE1
movd [OUT + 0], xmm0
paddd xmm1, SAVE1
- pshufd xmm5, xmm1,
ROTL
+ pshufd xmm5, xmm1,
SHUF(2, 1, 0, 3)
movd [OUT + 16], xmm1
paddd xmm2, SAVE2
movd [OUT + 16], xmm1
paddd xmm2, SAVE2
- pshufd xmm6, xmm2,
ROT2
+ pshufd xmm6, xmm2,
SHUF(1, 0, 3, 2)
movd [OUT + 32], xmm2
paddd xmm3, SAVE3
movd [OUT + 32], xmm2
paddd xmm3, SAVE3
- pshufd xmm7, xmm3,
ROTR
+ pshufd xmm7, xmm3,
SHUF(0, 3, 2, 1)
movd [OUT + 48], xmm3
movd [OUT + 4], xmm7
movd [OUT + 48], xmm3
movd [OUT + 4], xmm7
- pshufd xmm7, xmm3,
ROT2
+ pshufd xmm7, xmm3,
SHUF(1, 0, 3, 2)
movd [OUT + 24], xmm7
movd [OUT + 24], xmm7
- pshufd xmm3, xmm3,
ROTL
+ pshufd xmm3, xmm3,
SHUF(2, 1, 0, 3)
movd [OUT + 44], xmm3
movd [OUT + 8], xmm6
movd [OUT + 44], xmm3
movd [OUT + 8], xmm6
- pshufd xmm6, xmm2,
ROTL
+ pshufd xmm6, xmm2,
SHUF(2, 1, 0, 3)
movd [OUT + 28], xmm6
movd [OUT + 28], xmm6
- pshufd xmm2, xmm2,
ROTR
+ pshufd xmm2, xmm2,
SHUF(0, 3, 2, 1)
movd [OUT + 52], xmm2
movd [OUT + 12], xmm5
movd [OUT + 52], xmm2
movd [OUT + 12], xmm5
- pshufd xmm5, xmm1,
ROTR
+ pshufd xmm5, xmm1,
SHUF(0, 3, 2, 1)
movd [OUT + 36], xmm5
movd [OUT + 36], xmm5
- pshufd xmm1, xmm1,
ROT2
+ pshufd xmm1, xmm1,
SHUF(1, 0, 3, 2)
movd [OUT + 56], xmm1
movd [OUT + 20], xmm4
movd [OUT + 56], xmm1
movd [OUT + 20], xmm4
- pshufd xmm4, xmm0,
ROT2
+ pshufd xmm4, xmm0,
SHUF(1, 0, 3, 2)
movd [OUT + 40], xmm4
movd [OUT + 40], xmm4
- pshufd xmm0, xmm0,
ROTL
+ pshufd xmm0, xmm0,
SHUF(2, 1, 0, 3)
movd [OUT + 60], xmm0
// Tidy things up.
movd [OUT + 60], xmm0
// Tidy things up.