1 /// -*- mode: asm; asm-comment-char: ?/ -*-
3 /// Fancy SIMD implementation of Salsa20
5 /// (c) 2015 Straylight/Edgeware
8 ///----- Licensing notice ---------------------------------------------------
10 /// This file is part of Catacomb.
12 /// Catacomb is free software; you can redistribute it and/or modify
13 /// it under the terms of the GNU Library General Public License as
14 /// published by the Free Software Foundation; either version 2 of the
15 /// License, or (at your option) any later version.
17 /// Catacomb is distributed in the hope that it will be useful,
18 /// but WITHOUT ANY WARRANTY; without even the implied warranty of
19 /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 /// GNU Library General Public License for more details.
22 /// You should have received a copy of the GNU Library General Public
23 /// License along with Catacomb; if not, write to the Free
24 /// Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25 /// MA 02111-1307, USA.
27 ///--------------------------------------------------------------------------
28 /// General definitions.
30 // Some useful variables.
33 // Literal pools done the hard way.
34 #define _LIT .text .L$_subsec + 1
35 #define _ENDLIT .text .L$_subsec
36 #define _LTORG .L$_subsec = .L$_subsec + 2; .text .L$_subsec
41 # define _SECTTY(ty) %ty
43 # define _SECTTY(ty) @ty
48 #define TEXT .text .L$_subsec
50 # define RODATA .section .rdata, "dr"
52 # define RODATA .section .rodata, "a", _SECTTY(progbits)
58 // Announcing an external function.
62 .macro ENDFUNC; _ENDFUNC(name); .endm; \
67 // Marking the end of a function.
68 #define _ENDFUNC(name) \
74 // Make a helper function, if necessary.
76 .ifndef .L$_auxfn_def.name; \
78 .macro _ENDAUXFN; _ENDAUXFN_TAIL(name); .endm; \
81 #define _ENDAUXFN_TAIL(name) \
84 .L$_auxfn_def.name = 1
85 #define ENDAUXFN _ENDAUXFN; .endif
87 ///--------------------------------------------------------------------------
88 /// ELF-specific hacking.
92 #if __PIC__ || __PIE__
96 #define TYPE_FUNC(name) .type name, STT_FUNC
98 #define SIZE_OBJ(name) .size name, . - name
102 ///--------------------------------------------------------------------------
103 /// Windows-specific hacking.
108 # define F(name) _##name
113 ///--------------------------------------------------------------------------
114 /// x86- and amd64-specific hacking.
116 /// It's (slightly) easier to deal with both of these in one go.
118 #if CPUFAM_X86 || CPUFAM_AMD64
120 // Set the function hooks.
121 #define FUNC_PREHOOK(_) .balign 16
123 // On Windows, arrange to install stack-unwinding data.
124 #if CPUFAM_AMD64 && ABI_WIN
125 # define FUNC_POSTHOOK(name) .seh_proc name
126 # define ENDFUNC_HOOK(_) .seh_endproc
127 // Procedures are expected to invoke `.seh_setframe' if necessary, and
128 // `.seh_pushreg' and friends, and `.seh_endprologue'.
131 // Don't use the wretched AT&T syntax. It's festooned with pointless
132 // punctuation, and all of the data movement is backwards. Ugh!
133 .intel_syntax noprefix
135 // Call external subroutine at ADDR, possibly via PLT.
144 // Do I need to arrange a spare GOT register?
145 #if WANT_PIC && CPUFAM_X86
148 #define GOTREG ebx // Not needed in AMD64 so don't care.
150 // Maybe load GOT address into GOT.
151 .macro ldgot got=GOTREG
152 #if WANT_PIC && CPUFAM_X86
158 add \got, offset _GLOBAL_OFFSET_TABLE_
162 // Load address of external symbol ADDR into REG, maybe using GOT.
163 .macro leaext reg, addr, got=GOTREG
166 mov \reg, [\got + \addr@GOT]
169 mov \reg, \addr@GOTPCREL[rip]
173 mov \reg, offset \addr
181 // Address expression (possibly using a base register, and a displacement)
182 // referring to ADDR, which is within our module, maybe using GOT.
183 #define INTADDR(...) INTADDR__0(__VA_ARGS__, GOTREG, dummy)
184 #define INTADDR__0(addr, got, ...) INTADDR__1(addr, got)
186 # define INTADDR__1(addr, got) addr + rip
188 # define INTADDR__1(addr, got) got + addr@GOTOFF
190 # define INTADDR__1(addr, got) addr
193 // Permutations for SIMD instructions. SHUF(D, C, B, A) is an immediate,
194 // suitable for use in `pshufd' or `shufpd', which copies element D
195 // (0 <= D < 4) of the source to element 3 of the destination, element C to
196 // element 2, element B to element 1, and element A to element 0.
197 #define SHUF(d, c, b, a) (64*(d) + 16*(c) + 4*(b) + (a))
204 // Stash GP registers and establish temporary stack frame.
223 // Print FMT and the other established arguments.
224 lea eax, .L$_reg$msg.\@
264 _reg.3 "\msg: \r = %08x"
272 pshufd xmm0, xmm0, 0x1b
275 _reg.3 "\msg: \r = %08x %08x %08x %08x"
285 _reg.3 "\msg: \r = %08x %08x"
293 fldt [esp + 32 + 16*\i]
296 _reg.3 "\msg: st(\i) = %.20Lg"
304 fldt [esp + 32 + 16*\i]
307 _reg.3 "\msg: st(\i) = %La"
312 ///--------------------------------------------------------------------------
313 /// ARM-specific hacking.
317 // ARM/Thumb mode things. Use ARM by default.
318 #define ARM .arm; .L$_pcoff = 8
319 #define THUMB .thumb; .L$_pcoff = 4
322 // Set the function hooks.
323 #define FUNC_PREHOOK(_) .balign 4
324 #define ENDFUNC_HOOK(name) .ltorg
326 // Call external subroutine at ADDR, possibly via PLT.
327 .macro callext addr, cond=
335 // Do I need to arrange a spare GOT register?
341 // Maybe load GOT address into GOT.
342 .macro ldgot cond=, got=GOTREG
344 ldr\cond \got, .L$_ldgot$\@
346 add\cond \got, pc, \got
350 .word _GLOBAL_OFFSET_TABLE_ - .L$_ldgot_pc$\@ - .L$_pcoff
355 // Load address of external symbol ADDR into REG, maybe using GOT.
356 .macro leaext reg, addr, cond=, got=GOTREG
358 ldr\cond \reg, .L$_leaext$\@
359 ldr\cond \reg, [\got, \reg]
366 ldr\cond \reg, =\addr
370 // Load address of external symbol ADDR into REG directly.
371 .macro leaextq reg, addr, cond=
373 ldr\cond \reg, .L$_leaextq$\@
376 ldr\cond \reg, [pc, \reg]
379 ldr\cond \reg, [\reg]
384 .word \addr(GOT_PREL) + (. - .L$_leaextq_pc$\@ - .L$_pcoff)
387 ldr\cond \reg, =\addr
393 ///--------------------------------------------------------------------------
396 // Default values for the various hooks.
398 # define FUNC_PREHOOK(name)
400 #ifndef FUNC_POSTHOOK
401 # define FUNC_POSTHOOK(name)
404 # define ENDFUNC_HOOK(name)
408 # define F(name) name
412 # define TYPE_FUNC(name)
416 # define SIZE_OBJ(name)
419 #if __ELF__ && defined(WANT_EXECUTABLE_STACK)
420 .pushsection .note.GNU-stack, "", _SECTTY(progbits)
424 ///----- That's all, folks --------------------------------------------------