aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2013-04-08 21:51:00 +0300
committerHerbert Xu <herbert@gondor.apana.org.au>2013-04-25 21:01:51 +0800
commit18be45270a80ab489d9402b63e1f103428f0afde (patch)
tree4335e792dd1e612fff4838619698263dfce4344e /arch/x86/crypto/twofish-avx-x86_64-asm_64.S
parentcrypto: x86 - add more optimized XTS-mode for serpent-avx (diff)
downloadlinux-dev-18be45270a80ab489d9402b63e1f103428f0afde.tar.xz
linux-dev-18be45270a80ab489d9402b63e1f103428f0afde.zip
crypto: x86/twofish-avx - use optimized XTS code
Change twofish-avx to use the new XTS code, for smaller stack usage and small boost to performance. tcrypt results, with Intel i5-2450M: enc dec 16B 1.03x 1.02x 64B 0.91x 0.91x 256B 1.10x 1.09x 1024B 1.12x 1.11x 8192B 1.12x 1.11x Since XTS is practically always used with data blocks of size 512 bytes or more, I chose to not make use of twofish-3way for block sized smaller than 128 bytes. This causes slower result in tcrypt for 64 bytes. Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'arch/x86/crypto/twofish-avx-x86_64-asm_64.S')
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S48
1 files changed, 47 insertions, 1 deletions
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index 8d3e113b2c95..05058134c443 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -4,7 +4,7 @@
* Copyright (C) 2012 Johannes Goetzfried
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*
- * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -33,6 +33,8 @@
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.Lxts_gf128mul_and_shl1_mask:
+ .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
.text
@@ -408,3 +410,47 @@ ENTRY(twofish_ctr_8way)
ret;
ENDPROC(twofish_ctr_8way)
+
+ENTRY(twofish_xts_enc_8way)
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
+ */
+
+ movq %rsi, %r11;
+
+ /* regs <= src, dst <= IVs, regs <= regs xor IVs */
+ load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
+ RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);
+
+ call __twofish_enc_blk8;
+
+ /* dst <= regs xor IVs(in dst) */
+ store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
+
+ ret;
+ENDPROC(twofish_xts_enc_8way)
+
+ENTRY(twofish_xts_dec_8way)
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
+ */
+
+ movq %rsi, %r11;
+
+ /* regs <= src, dst <= IVs, regs <= regs xor IVs */
+ load_xts_8way(%rcx, %rdx, %rsi, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2,
+ RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);
+
+ call __twofish_dec_blk8;
+
+ /* dst <= regs xor IVs(in dst) */
+ store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ ret;
+ENDPROC(twofish_xts_dec_8way)