aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
diff options
context:
space:
mode:
authorJosh Poimboeuf <jpoimboe@redhat.com>2017-09-18 14:42:02 -0500
committerHerbert Xu <herbert@gondor.apana.org.au>2017-09-20 17:42:32 +0800
commit4b15606664a2f8d7c4f0092fb0305fe1c7c65b7b (patch)
tree207a3d51441bf2376b8f084f154db23adfc98847 /arch/x86/crypto/cast5-avx-x86_64-asm_64.S
parentcrypto: x86/camellia - Fix RBP usage (diff)
downloadlinux-dev-4b15606664a2f8d7c4f0092fb0305fe1c7c65b7b.tar.xz
linux-dev-4b15606664a2f8d7c4f0092fb0305fe1c7c65b7b.zip
crypto: x86/cast5 - Fix RBP usage
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R15 instead of RBP. R15 can't be used as the RID1 register because of x86 instruction encoding limitations. So use R15 for CTX and RDI for CTX. This means that CTX is no longer an implicit function argument. Instead it needs to be explicitly copied from RDI. Reported-by: Eric Biggers <ebiggers@google.com> Reported-by: Peter Zijlstra <peterz@infradead.org> Tested-by: Eric Biggers <ebiggers@google.com> Acked-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/cast5-avx-x86_64-asm_64.S')
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S47
1 files changed, 30 insertions, 17 deletions
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index b4a8806234ea..86107c961bb4 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -47,7 +47,7 @@
/**********************************************************************
16-way AVX cast5
**********************************************************************/
-#define CTX %rdi
+#define CTX %r15
#define RL1 %xmm0
#define RR1 %xmm1
@@ -70,8 +70,8 @@
#define RTMP %xmm15
-#define RID1 %rbp
-#define RID1d %ebp
+#define RID1 %rdi
+#define RID1d %edi
#define RID2 %rsi
#define RID2d %esi
@@ -226,7 +226,7 @@
.align 16
__cast5_enc_blk16:
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* RL1: blocks 1 and 2
* RR1: blocks 3 and 4
* RL2: blocks 5 and 6
@@ -246,9 +246,11 @@ __cast5_enc_blk16:
* RR4: encrypted blocks 15 and 16
*/
- pushq %rbp;
+ pushq %r15;
pushq %rbx;
+ movq %rdi, CTX;
+
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
@@ -283,7 +285,7 @@ __cast5_enc_blk16:
.L__skip_enc:
popq %rbx;
- popq %rbp;
+ popq %r15;
vmovdqa .Lbswap_mask, RKM;
@@ -298,7 +300,7 @@ ENDPROC(__cast5_enc_blk16)
.align 16
__cast5_dec_blk16:
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* RL1: encrypted blocks 1 and 2
* RR1: encrypted blocks 3 and 4
* RL2: encrypted blocks 5 and 6
@@ -318,9 +320,11 @@ __cast5_dec_blk16:
* RR4: decrypted blocks 15 and 16
*/
- pushq %rbp;
+ pushq %r15;
pushq %rbx;
+ movq %rdi, CTX;
+
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
@@ -356,7 +360,7 @@ __cast5_dec_blk16:
vmovdqa .Lbswap_mask, RKM;
popq %rbx;
- popq %rbp;
+ popq %r15;
outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
@@ -372,12 +376,14 @@ ENDPROC(__cast5_dec_blk16)
ENTRY(cast5_ecb_enc_16way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1;
@@ -400,18 +406,22 @@ ENTRY(cast5_ecb_enc_16way)
vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast5_ecb_enc_16way)
ENTRY(cast5_ecb_dec_16way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
+ pushq %r15;
+
+ movq %rdi, CTX;
movq %rsi, %r11;
vmovdqu (0*4*4)(%rdx), RL1;
@@ -434,20 +444,22 @@ ENTRY(cast5_ecb_dec_16way)
vmovdqu RR4, (6*4*4)(%r11);
vmovdqu RL4, (7*4*4)(%r11);
+ popq %r15;
FRAME_END
ret;
ENDPROC(cast5_ecb_dec_16way)
ENTRY(cast5_cbc_dec_16way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
*/
FRAME_BEGIN
-
pushq %r12;
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
@@ -483,23 +495,24 @@ ENTRY(cast5_cbc_dec_16way)
vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11);
+ popq %r15;
popq %r12;
-
FRAME_END
ret;
ENDPROC(cast5_cbc_dec_16way)
ENTRY(cast5_ctr_16way)
/* input:
- * %rdi: ctx, CTX
+ * %rdi: ctx
* %rsi: dst
* %rdx: src
* %rcx: iv (big endian, 64bit)
*/
FRAME_BEGIN
-
pushq %r12;
+ pushq %r15;
+ movq %rdi, CTX;
movq %rsi, %r11;
movq %rdx, %r12;
@@ -558,8 +571,8 @@ ENTRY(cast5_ctr_16way)
vmovdqu RR4, (6*16)(%r11);
vmovdqu RL4, (7*16)(%r11);
+ popq %r15;
popq %r12;
-
FRAME_END
ret;
ENDPROC(cast5_ctr_16way)