*.S: Use `.text' consistently to name the text section.

[catacomb] / symm / salsa20-x86ish-sse2.S
diff --git a/symm/salsa20-x86ish-sse2.S b/symm/salsa20-x86ish-sse2.S

index a168d79a7b0cb91b64de7eb12df42f6006bae4ea..47401b7a769a57da093e0ec9f96497da32d19f92 100644 (file)
--- a/symm/salsa20-x86ish-sse2.S
+++ b/symm/salsa20-x86ish-sse2.S
@@ -42,7 +42,7 @@
  /// Main code.
  
         .arch pentium4
-       .section .text
+       .text
  
  FUNC(salsa20_core_x86ish_sse2)
  
@@ -146,13 +146,13 @@ FUNC(salsa20_core_x86ish_sse2)
         movdqu  xmm2, [IN + 32]
         movdqu  xmm3, [IN + 48]
  
-       ## Take a copy for later.
+       // Take a copy for later.
         movdqa  SAVE0, xmm0
         movdqa  SAVE1, xmm1
         movdqa  SAVE2, xmm2
         movdqa  SAVE3, xmm3
  
-loop:
+0:
         // Apply a column quarterround to each of the columns simultaneously.
         // Alas, there doesn't seem to be a packed doubleword rotate, so we
         // have to synthesize it.
@@ -256,7 +256,7 @@ loop:
         // Decrement the loop counter and see if we should go round again.
         // Later processors fuse this pair into a single uop.
         sub     NR, 2
-       ja      loop
+       ja      0b
  
         // Almost there.  Firstly, the feedforward addition, and then we have
         // to write out the result.  Here we have to undo the permutation