From: Mark Wooding Date: Sat, 30 Jul 2016 10:48:16 +0000 (+0100) Subject: symm/rijndael-arm-crypto.S: More aggressive loading of subkey data. X-Git-Tag: 2.3.0~46 X-Git-Url: http://www.chiark.greenend.org.uk/ucgi/~mdw/git/catacomb/commitdiff_plain/6717fd12f186b68e677fb83f647092cd2407284c?hp=f71dd54d995dcffeb73fb41132a60b6ae62d2ea3 symm/rijndael-arm-crypto.S: More aggressive loading of subkey data. Rewrite the block-encryption primitives so that they load key data in multiple round chunks. There's now a separate prefix piece for each number of rounds other than ten which does the extra and flows into the main sequence. Because the code is now rather more complicated, there's only one copy of it, in a macro, as for the AESNI version. --- diff --git a/symm/rijndael-arm-crypto.S b/symm/rijndael-arm-crypto.S index 908faaa4..49aa0166 100644 --- a/symm/rijndael-arm-crypto.S +++ b/symm/rijndael-arm-crypto.S @@ -227,7 +227,8 @@ ENDFUNC ///-------------------------------------------------------------------------- /// Encrypting and decrypting blocks. -FUNC(rijndael_eblk_arm_crypto) +.macro encdec op, aes, mc, koff + FUNC(rijndael_\op\()_arm_crypto) // Arguments: // r0 = pointer to context @@ -236,77 +237,95 @@ FUNC(rijndael_eblk_arm_crypto) // Set things up ready. ldr r3, [r0, #nr] - add r0, r0, #w + add r0, r0, #\koff vldmia r1, {d0, d1} vrev32.8 q0, q0 - // Dispatch according to the number of rounds. - add r3, r3, r3, lsl #1 - rsbs r3, r3, #3*14 - addcs pc, pc, r3, lsl #2 + // Check the number of rounds and dispatch. + sub r3, r3, #10 + cmp r3, #5 + addlo pc, pc, r3, lsl #2 callext F(abort) - // The last round doesn't have MixColumns, so do it separately. - .rept 13 - vldmia r0!, {d2, d3} - aese.8 q0, q1 - aesmc.8 q0, q0 - .endr - - // Final round. - vldmia r0!, {d2, d3} - aese.8 q0, q1 - - // Final whitening. - vldmia r0!, {d2, d3} - veor q0, q1 + b 10f + b 11f + b 12f + b 13f + b 14f + + // Eleven rounds. +11: vldmia r0!, {d16, d17} + \aes\().8 q0, q8 + \mc\().8 q0, q0 + b 10f + + // Twelve rounds. +12: vldmia r0!, {d16-d19} + \aes\().8 q0, q8 + \mc\().8 q0, q0 + \aes\().8 q0, q9 + \mc\().8 q0, q0 + b 10f + + // Thirteen rounds. +13: vldmia r0!, {d16-d21} + \aes\().8 q0, q8 + \mc\().8 q0, q0 + \aes\().8 q0, q9 + \mc\().8 q0, q0 + \aes\().8 q0, q10 + \mc\().8 q0, q0 + b 10f + + // Fourteen rounds. (Drops through to the ten round case because + // this is the next most common.) +14: vldmia r0!, {d16-d23} + \aes\().8 q0, q8 + \mc\().8 q0, q0 + \aes\().8 q0, q9 + \mc\().8 q0, q0 + \aes\().8 q0, q10 + \mc\().8 q0, q0 + \aes\().8 q0, q11 + \mc\().8 q0, q0 + // Drop through... + + // Ten rounds. +10: vldmia r0!, {d16-d25} + \aes\().8 q0, q8 + \mc\().8 q0, q0 + \aes\().8 q0, q9 + \mc\().8 q0, q0 + \aes\().8 q0, q10 + \mc\().8 q0, q0 + \aes\().8 q0, q11 + \mc\().8 q0, q0 + \aes\().8 q0, q12 + \mc\().8 q0, q0 + + vldmia r0!, {d16-d27} + \aes\().8 q0, q8 + \mc\().8 q0, q0 + \aes\().8 q0, q9 + \mc\().8 q0, q0 + \aes\().8 q0, q10 + \mc\().8 q0, q0 + \aes\().8 q0, q11 + \mc\().8 q0, q0 + + // Final round has no MixColumns, but is followed by final whitening. + \aes\().8 q0, q12 + veor q0, q0, q13 // All done. vrev32.8 q0, q0 vstmia r2, {d0, d1} bx r14 -ENDFUNC - -FUNC(rijndael_dblk_arm_crypto) - - // Arguments: - // r0 = pointer to context - // r1 = pointer to input block - // r2 = pointer to output block - - // Set things up ready. - ldr r3, [r0, #nr] - add r0, r0, #wi - vldmia r1, {d0, d1} - vrev32.8 q0, q0 - - // Dispatch according to the number of rounds. - add r3, r3, r3, lsl #1 - rsbs r3, r3, #3*14 - addcs pc, pc, r3, lsl #2 - callext F(abort) - - // The last round doesn't have MixColumns, so do it separately. - .rept 13 - vldmia r0!, {d2, d3} - aesd.8 q0, q1 - aesimc.8 q0, q0 - .endr - - // Final round. - vldmia r0!, {d2, d3} - aesd.8 q0, q1 - - // Final whitening. - vldmia r0!, {d2, d3} - veor q0, q1 + ENDFUNC +.endm - // All done. - vrev32.8 q0, q0 - vstmia r2, {d0, d1} - bx r14 - -ENDFUNC + encdec eblk, aese, aesmc, w + encdec dblk, aesd, aesimc, wi ///----- That's all, folks --------------------------------------------------