// Main key expansion loop. The first word of each key-length chunk
// needs special treatment.
-9: ldrb r14, [r5], #1 // next round constant
+0: ldrb r14, [r5], #1 // next round constant
ldr r6, [r9, -r3, lsl #2]
vdup.32 q0, r4
aese.8 q0, q1 // effectively, just SubBytes
eor r4, r4, r6
str r4, [r9], #4
cmp r9, r8
- bcs 8f
+ bcs 9f
// The next three words are simple.
ldr r6, [r9, -r3, lsl #2]
eor r4, r4, r6
str r4, [r9], #4
cmp r9, r8
- bcs 8f
+ bcs 9f
// (Word 2...)
ldr r6, [r9, -r3, lsl #2]
eor r4, r4, r6
str r4, [r9], #4
cmp r9, r8
- bcs 8f
+ bcs 9f
// (Word 3...)
ldr r6, [r9, -r3, lsl #2]
eor r4, r4, r6
str r4, [r9], #4
cmp r9, r8
- bcs 8f
+ bcs 9f
// Word 4. If the key is /more/ than 6 words long, then we must
// apply a substitution here.
cmp r3, #5
- bcc 9b
+ bcc 0b
ldr r6, [r9, -r3, lsl #2]
cmp r3, #7
- bcc 0f
+ bcc 1f
vdup.32 q0, r4
aese.8 q0, q1 // effectively, just SubBytes
vmov.32 r4, d0[0]
-0: eor r4, r4, r6
+1: eor r4, r4, r6
str r4, [r9], #4
cmp r9, r8
- bcs 8f
+ bcs 9f
// (Word 5...)
cmp r3, #6
- bcc 9b
+ bcc 0b
ldr r6, [r9, -r3, lsl #2]
eor r4, r4, r6
str r4, [r9], #4
cmp r9, r8
- bcs 8f
+ bcs 9f
// (Word 6...)
cmp r3, #7
- bcc 9b
+ bcc 0b
ldr r6, [r9, -r3, lsl #2]
eor r4, r4, r6
str r4, [r9], #4
cmp r9, r8
- bcs 8f
+ bcs 9f
// (Word 7...)
cmp r3, #8
- bcc 9b
+ bcc 0b
ldr r6, [r9, -r3, lsl #2]
eor r4, r4, r6
str r4, [r9], #4
cmp r9, r8
- bcs 8f
+ bcs 9f
// Must be done by now.
- b 9b
+ b 0b
// Next job is to construct the decryption keys. The keys for the
// first and last rounds don't need to be mangled, but the remaining
// there's easily enough buffer space for the over-enthusiastic reads
// and writes because the context has space for 32-byte blocks, which
// is our maximum and an exact fit for two Q-class registers.
-8: add r5, r0, #wi
+9: add r5, r0, #wi
add r4, r0, #w
add r4, r4, r2, lsl #2
sub r4, r4, r1, lsl #2 // last round's keys
vstmiane r5, {d0-d3}
// Update the loop variables and stop if we've finished.
-9: sub r4, r4, r1, lsl #2
+0: sub r4, r4, r1, lsl #2
add r5, r5, r1, lsl #2
subs r7, r7, #1
- beq 0f
+ beq 9f
// Do another middle round's keys...
teq r1, #4
vldmiane r4, {d0-d3}
aesimc.8 q0, q0
vstmiaeq r5, {d0, d1}
- beq 9b
+ beq 0b
aesimc.8 q1, q1
vstmia r5, {d0-d3}
- b 9b
+ b 0b
// Finally do the first encryption round.
-0: teq r1, #4
+9: teq r1, #4
vldmiaeq r4, {d0, d1}
vldmiane r4, {d0-d3}
vstmiaeq r5, {d0, d1}
// If the block size is not exactly four words then we must end-swap
// everything. We can use fancy NEON toys for this.
- beq 0f
+ beq 9f
// End-swap the encryption keys.
add r1, r0, #w
bl endswap_block
// All done.
-0: ldmfd sp!, {r4-r9, pc}
+9: ldmfd sp!, {r4-r9, pc}
endswap_block:
// End-swap R2 words starting at R1. R1 is clobbered; R2 is not.