diff options
Diffstat (limited to '')
-rw-r--r-- | arch/arm/crypto/aes-neonbs-core.S | 149 |
1 files changed, 83 insertions, 66 deletions
diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S index cfaed4e67535..7b61032f29fa 100644 --- a/arch/arm/crypto/aes-neonbs-core.S +++ b/arch/arm/crypto/aes-neonbs-core.S @@ -77,11 +77,6 @@ vldr \out\()h, \sym + 8 .endm - .macro __adr, reg, lbl - adr \reg, \lbl -THUMB( orr \reg, \reg, #1 ) - .endm - .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7 veor \b2, \b2, \b1 veor \b5, \b5, \b6 @@ -629,11 +624,11 @@ ENDPROC(aesbs_decrypt8) push {r4-r6, lr} ldr r5, [sp, #16] // number of blocks -99: __adr ip, 0f +99: adr ip, 0f and lr, r5, #7 cmp r5, #8 sub ip, ip, lr, lsl #2 - bxlt ip // computed goto if blocks < 8 + movlt pc, ip // computed goto if blocks < 8 vld1.8 {q0}, [r1]! vld1.8 {q1}, [r1]! @@ -648,11 +643,11 @@ ENDPROC(aesbs_decrypt8) mov rounds, r3 bl \do8 - __adr ip, 1f + adr ip, 1f and lr, r5, #7 cmp r5, #8 sub ip, ip, lr, lsl #2 - bxlt ip // computed goto if blocks < 8 + movlt pc, ip // computed goto if blocks < 8 vst1.8 {\o0}, [r0]! vst1.8 {\o1}, [r0]! @@ -689,12 +684,12 @@ ENTRY(aesbs_cbc_decrypt) push {r4-r6, lr} ldm ip, {r5-r6} // load args 4-5 -99: __adr ip, 0f +99: adr ip, 0f and lr, r5, #7 cmp r5, #8 sub ip, ip, lr, lsl #2 mov lr, r1 - bxlt ip // computed goto if blocks < 8 + movlt pc, ip // computed goto if blocks < 8 vld1.8 {q0}, [lr]! vld1.8 {q1}, [lr]! @@ -718,11 +713,11 @@ ENTRY(aesbs_cbc_decrypt) vmov q14, q8 vmov q15, q8 - __adr ip, 1f + adr ip, 1f and lr, r5, #7 cmp r5, #8 sub ip, ip, lr, lsl #2 - bxlt ip // computed goto if blocks < 8 + movlt pc, ip // computed goto if blocks < 8 vld1.8 {q9}, [r1]! vld1.8 {q10}, [r1]! @@ -733,9 +728,9 @@ ENTRY(aesbs_cbc_decrypt) vld1.8 {q15}, [r1]! W(nop) -1: __adr ip, 2f +1: adr ip, 2f sub ip, ip, lr, lsl #3 - bxlt ip // computed goto if blocks < 8 + movlt pc, ip // computed goto if blocks < 8 veor q0, q0, q8 vst1.8 {q0}, [r0]! @@ -763,29 +758,24 @@ ENTRY(aesbs_cbc_decrypt) ENDPROC(aesbs_cbc_decrypt) .macro next_ctr, q - vmov.32 \q\()h[1], r10 + vmov \q\()h, r9, r10 adds r10, r10, #1 - vmov.32 \q\()h[0], r9 adcs r9, r9, #0 - vmov.32 \q\()l[1], r8 + vmov \q\()l, r7, r8 adcs r8, r8, #0 - vmov.32 \q\()l[0], r7 adc r7, r7, #0 vrev32.8 \q, \q .endm /* * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - * int rounds, int blocks, u8 ctr[], u8 final[]) + * int rounds, int bytes, u8 ctr[]) */ ENTRY(aesbs_ctr_encrypt) mov ip, sp push {r4-r10, lr} - ldm ip, {r5-r7} // load args 4-6 - teq r7, #0 - addne r5, r5, #1 // one extra block if final != 0 - + ldm ip, {r5, r6} // load args 4-5 vld1.8 {q0}, [r6] // load counter vrev32.8 q1, q0 vmov r9, r10, d3 @@ -797,20 +787,19 @@ ENTRY(aesbs_ctr_encrypt) adc r7, r7, #0 99: vmov q1, q0 + sub lr, r5, #1 vmov q2, q0 + adr ip, 0f vmov q3, q0 + and lr, lr, #112 vmov q4, q0 + cmp r5, #112 vmov q5, q0 + sub ip, ip, lr, lsl #1 vmov q6, q0 + add ip, ip, lr, lsr #2 vmov q7, q0 - - __adr ip, 0f - sub lr, r5, #1 - and lr, lr, #7 - cmp r5, #8 - sub ip, ip, lr, lsl #5 - sub ip, ip, lr, lsl #2 - bxlt ip // computed goto if blocks < 8 + movle pc, ip // computed goto if bytes < 112 next_ctr q1 next_ctr q2 @@ -824,13 +813,15 @@ ENTRY(aesbs_ctr_encrypt) mov rounds, r3 bl aesbs_encrypt8 - __adr ip, 1f - and lr, r5, #7 - cmp r5, #8 - movgt r4, #0 - ldrle r4, [sp, #40] // load final in the last round - sub ip, ip, lr, lsl #2 - bxlt ip // computed goto if blocks < 8 + adr ip, 1f + sub lr, r5, #1 + cmp r5, #128 + bic lr, lr, #15 + ands r4, r5, #15 // preserves C flag + teqcs r5, r5 // set Z flag if not last iteration + sub ip, ip, lr, lsr #2 + rsb r4, r4, #16 + movcc pc, ip // computed goto if bytes < 128 vld1.8 {q8}, [r1]! vld1.8 {q9}, [r1]! @@ -839,46 +830,70 @@ ENTRY(aesbs_ctr_encrypt) vld1.8 {q12}, [r1]! vld1.8 {q13}, [r1]! vld1.8 {q14}, [r1]! - teq r4, #0 // skip last block if 'final' -1: bne 2f +1: subne r1, r1, r4 vld1.8 {q15}, [r1]! -2: __adr ip, 3f - cmp r5, #8 - sub ip, ip, lr, lsl #3 - bxlt ip // computed goto if blocks < 8 + add ip, ip, #2f - 1b veor q0, q0, q8 - vst1.8 {q0}, [r0]! veor q1, q1, q9 - vst1.8 {q1}, [r0]! veor q4, q4, q10 - vst1.8 {q4}, [r0]! veor q6, q6, q11 - vst1.8 {q6}, [r0]! veor q3, q3, q12 - vst1.8 {q3}, [r0]! veor q7, q7, q13 - vst1.8 {q7}, [r0]! veor q2, q2, q14 + bne 3f + veor q5, q5, q15 + + movcc pc, ip // computed goto if bytes < 128 + + vst1.8 {q0}, [r0]! + vst1.8 {q1}, [r0]! + vst1.8 {q4}, [r0]! + vst1.8 {q6}, [r0]! + vst1.8 {q3}, [r0]! + vst1.8 {q7}, [r0]! vst1.8 {q2}, [r0]! - teq r4, #0 // skip last block if 'final' - W(bne) 5f -3: veor q5, q5, q15 +2: subne r0, r0, r4 vst1.8 {q5}, [r0]! -4: next_ctr q0 + next_ctr q0 - subs r5, r5, #8 + subs r5, r5, #128 bgt 99b vst1.8 {q0}, [r6] pop {r4-r10, pc} -5: vst1.8 {q5}, [r4] - b 4b +3: adr lr, .Lpermute_table + 16 + cmp r5, #16 // Z flag remains cleared + sub lr, lr, r4 + vld1.8 {q8-q9}, [lr] + vtbl.8 d16, {q5}, d16 + vtbl.8 d17, {q5}, d17 + veor q5, q8, q15 + bcc 4f // have to reload prev if R5 < 16 + vtbx.8 d10, {q2}, d18 + vtbx.8 d11, {q2}, d19 + mov pc, ip // branch back to VST sequence + +4: sub r0, r0, r4 + vshr.s8 q9, q9, #7 // create mask for VBIF + vld1.8 {q8}, [r0] // reload + vbif q5, q8, q9 + vst1.8 {q5}, [r0] + pop {r4-r10, pc} ENDPROC(aesbs_ctr_encrypt) + .align 6 +.Lpermute_table: + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .macro next_tweak, out, in, const, tmp vshr.s64 \tmp, \in, #63 vand \tmp, \tmp, \const @@ -893,6 +908,7 @@ ENDPROC(aesbs_ctr_encrypt) * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[], int reorder_last_tweak) */ + .align 6 __xts_prepare8: vld1.8 {q14}, [r7] // load iv vmov.i32 d30, #0x87 // compose tweak mask vector @@ -900,12 +916,12 @@ __xts_prepare8: vshr.u64 d30, d31, #7 vmov q12, q14 - __adr ip, 0f + adr ip, 0f and r4, r6, #7 cmp r6, #8 sub ip, ip, r4, lsl #5 mov r4, sp - bxlt ip // computed goto if blocks < 8 + movlt pc, ip // computed goto if blocks < 8 vld1.8 {q0}, [r1]! next_tweak q12, q14, q15, q13 @@ -961,8 +977,7 @@ ENDPROC(__xts_prepare8) push {r4-r8, lr} mov r5, sp // preserve sp ldrd r6, r7, [sp, #24] // get blocks and iv args - ldr r8, [sp, #32] // reorder final tweak? - rsb r8, r8, #1 + rsb r8, ip, #1 sub ip, sp, #128 // make room for 8x tweak bic ip, ip, #0xf // align sp to 16 bytes mov sp, ip @@ -973,12 +988,12 @@ ENDPROC(__xts_prepare8) mov rounds, r3 bl \do8 - __adr ip, 0f + adr ip, 0f and lr, r6, #7 cmp r6, #8 sub ip, ip, lr, lsl #2 mov r4, sp - bxlt ip // computed goto if blocks < 8 + movlt pc, ip // computed goto if blocks < 8 vld1.8 {q8}, [r4, :128]! vld1.8 {q9}, [r4, :128]! @@ -989,9 +1004,9 @@ ENDPROC(__xts_prepare8) vld1.8 {q14}, [r4, :128]! vld1.8 {q15}, [r4, :128] -0: __adr ip, 1f +0: adr ip, 1f sub ip, ip, lr, lsl #3 - bxlt ip // computed goto if blocks < 8 + movlt pc, ip // computed goto if blocks < 8 veor \o0, \o0, q8 vst1.8 {\o0}, [r0]! @@ -1018,9 +1033,11 @@ ENDPROC(__xts_prepare8) .endm ENTRY(aesbs_xts_encrypt) + mov ip, #0 // never reorder final tweak __xts_crypt aesbs_encrypt8, q0, q1, q4, q6, q3, q7, q2, q5 ENDPROC(aesbs_xts_encrypt) ENTRY(aesbs_xts_decrypt) + ldr ip, [sp, #8] // reorder final tweak? __xts_crypt aesbs_decrypt8, q0, q1, q6, q4, q2, q7, q3, q5 ENDPROC(aesbs_xts_decrypt) |