/// Main code.
.arch pentium4
- .section .text
+ .text
FUNC(salsa20_core_x86ish_sse2)
// registers, but we want more than we can use as scratch space. Two
// places we only need to save a copy of the input for the
// feedforward at the end; but the other two we want for the final
- // permutation, so save the old values on the stack (We need an extra
- // 8 bytes to align the stack.)
+ // permutation, so save the old values on the stack. (We need an
+ // extra 8 bytes to align the stack.)
# define NR ecx
# define IN rdx
# define SAVE3 [rsp + 48]
sub rsp, 64 + 8
+ .seh_stackalloc 64 + 8
movdqa [rsp + 0], xmm6
+ .seh_savexmm xmm6, 0
movdqa [rsp + 16], xmm7
+ .seh_savexmm xmm7, 16
+ .seh_endprologue
#endif
// First job is to slurp the matrix into XMM registers. The words
movd [OUT + 60], xmm0
// Tidy things up.
-
#if CPUFAM_X86
mov esp, ebp
pop ebp