aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-02-21 17:23:56 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2021-02-21 17:23:56 -0800
commit31caf8b2a847214be856f843e251fc2ed2cd1075 (patch)
tree245257387cfcae352fae27b1ca824daab55472ba /arch/x86/crypto
parentMerge tag 'tpmdd-next-v5.12-rc1-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd (diff)
parenthwrng: timeriomem - Use device-managed registration API (diff)
downloadlinux-dev-31caf8b2a847214be856f843e251fc2ed2cd1075.tar.xz
linux-dev-31caf8b2a847214be856f843e251fc2ed2cd1075.zip
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: "API: - Restrict crypto_cipher to internal API users only. Algorithms: - Add x86 aesni acceleration for cts. - Improve x86 aesni acceleration for xts. - Remove x86 acceleration of some uncommon algorithms. - Remove RIPE-MD, Tiger and Salsa20. - Remove tnepres. - Add ARM acceleration for BLAKE2s and BLAKE2b. Drivers: - Add Keem Bay OCS HCU driver. - Add Marvell OcteonTX2 CPT PF driver. - Remove PicoXcell driver. - Remove mediatek driver" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (154 commits) hwrng: timeriomem - Use device-managed registration API crypto: hisilicon/qm - fix printing format issue crypto: hisilicon/qm - do not reset hardware when CE happens crypto: hisilicon/qm - update irqflag crypto: hisilicon/qm - fix the value of 'QM_SQC_VFT_BASE_MASK_V2' crypto: hisilicon/qm - fix request missing error crypto: hisilicon/qm - removing driver after reset crypto: octeontx2 - fix -Wpointer-bool-conversion warning crypto: hisilicon/hpre - enable Elliptic curve cryptography crypto: hisilicon - PASID fixed on Kunpeng 930 crypto: hisilicon/qm - fix use of 'dma_map_single' crypto: hisilicon/hpre - tiny fix crypto: hisilicon/hpre - adapt the number of clusters crypto: cpt - remove casting dma_alloc_coherent crypto: keembay-ocs-aes - Fix 'q' assignment during CCM B0 generation crypto: xor - Fix typo of optimization hwrng: optee - Use device-managed registration API crypto: arm64/crc-t10dif - move NEON yield to C code crypto: arm64/aes-ce-mac - simplify NEON yield crypto: arm64/aes-neonbs - remove NEON yield calls ...
Diffstat (limited to 'arch/x86/crypto')
-rw-r--r--arch/x86/crypto/Makefile2
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S482
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c753
-rw-r--r--arch/x86/crypto/blake2s-glue.c150
-rw-r--r--arch/x86/crypto/blowfish_glue.c107
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S298
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S351
-rw-r--r--arch/x86/crypto/camellia.h67
-rw-r--r--arch/x86/crypto/camellia_aesni_avx2_glue.c198
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c216
-rw-r--r--arch/x86/crypto/camellia_glue.c145
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c287
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S84
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c207
-rw-r--r--arch/x86/crypto/des3_ede_glue.c104
-rw-r--r--arch/x86/crypto/ecb_cbc_helpers.h76
-rw-r--r--arch/x86/crypto/glue_helper-asm-avx.S104
-rw-r--r--arch/x86/crypto/glue_helper-asm-avx2.S136
-rw-r--r--arch/x86/crypto/glue_helper.c381
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S68
-rw-r--r--arch/x86/crypto/serpent-avx.h21
-rw-r--r--arch/x86/crypto/serpent-avx2-asm_64.S87
-rw-r--r--arch/x86/crypto/serpent-sse2.h60
-rw-r--r--arch/x86/crypto/serpent_avx2_glue.c185
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c215
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c150
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S80
-rw-r--r--arch/x86/crypto/twofish.h21
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c211
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c160
30 files changed, 1310 insertions, 4096 deletions
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index a31de0c6ccde..b28e36b7c96b 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -4,8 +4,6 @@
OBJECT_FILES_NON_STANDARD := y
-obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
-
obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index d1436c37008b..4e3972570916 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -43,10 +43,6 @@
#ifdef __x86_64__
# constants in mergeable sections, linker can reorder and merge
-.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
-.align 16
-.Lgf128mul_x_ble_mask:
- .octa 0x00000000000000010000000000000087
.section .rodata.cst16.POLY, "aM", @progbits, 16
.align 16
POLY: .octa 0xC2000000000000000000000000000001
@@ -146,7 +142,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
#define CTR %xmm11
#define INC %xmm12
-#define GF128MUL_MASK %xmm10
+#define GF128MUL_MASK %xmm7
#ifdef __x86_64__
#define AREG %rax
@@ -2577,13 +2573,140 @@ SYM_FUNC_START(aesni_cbc_dec)
ret
SYM_FUNC_END(aesni_cbc_dec)
-#ifdef __x86_64__
+/*
+ * void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ * size_t len, u8 *iv)
+ */
+SYM_FUNC_START(aesni_cts_cbc_enc)
+ FRAME_BEGIN
+#ifndef __x86_64__
+ pushl IVP
+ pushl LEN
+ pushl KEYP
+ pushl KLEN
+ movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
+ movl (FRAME_OFFSET+24)(%esp), OUTP # dst
+ movl (FRAME_OFFSET+28)(%esp), INP # src
+ movl (FRAME_OFFSET+32)(%esp), LEN # len
+ movl (FRAME_OFFSET+36)(%esp), IVP # iv
+ lea .Lcts_permute_table, T1
+#else
+ lea .Lcts_permute_table(%rip), T1
+#endif
+ mov 480(KEYP), KLEN
+ movups (IVP), STATE
+ sub $16, LEN
+ mov T1, IVP
+ add $32, IVP
+ add LEN, T1
+ sub LEN, IVP
+ movups (T1), %xmm4
+ movups (IVP), %xmm5
+
+ movups (INP), IN1
+ add LEN, INP
+ movups (INP), IN2
+
+ pxor IN1, STATE
+ call _aesni_enc1
+
+ pshufb %xmm5, IN2
+ pxor STATE, IN2
+ pshufb %xmm4, STATE
+ add OUTP, LEN
+ movups STATE, (LEN)
+
+ movaps IN2, STATE
+ call _aesni_enc1
+ movups STATE, (OUTP)
+
+#ifndef __x86_64__
+ popl KLEN
+ popl KEYP
+ popl LEN
+ popl IVP
+#endif
+ FRAME_END
+ ret
+SYM_FUNC_END(aesni_cts_cbc_enc)
+
+/*
+ * void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
+ * size_t len, u8 *iv)
+ */
+SYM_FUNC_START(aesni_cts_cbc_dec)
+ FRAME_BEGIN
+#ifndef __x86_64__
+ pushl IVP
+ pushl LEN
+ pushl KEYP
+ pushl KLEN
+ movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
+ movl (FRAME_OFFSET+24)(%esp), OUTP # dst
+ movl (FRAME_OFFSET+28)(%esp), INP # src
+ movl (FRAME_OFFSET+32)(%esp), LEN # len
+ movl (FRAME_OFFSET+36)(%esp), IVP # iv
+ lea .Lcts_permute_table, T1
+#else
+ lea .Lcts_permute_table(%rip), T1
+#endif
+ mov 480(KEYP), KLEN
+ add $240, KEYP
+ movups (IVP), IV
+ sub $16, LEN
+ mov T1, IVP
+ add $32, IVP
+ add LEN, T1
+ sub LEN, IVP
+ movups (T1), %xmm4
+
+ movups (INP), STATE
+ add LEN, INP
+ movups (INP), IN1
+
+ call _aesni_dec1
+ movaps STATE, IN2
+ pshufb %xmm4, STATE
+ pxor IN1, STATE
+
+ add OUTP, LEN
+ movups STATE, (LEN)
+
+ movups (IVP), %xmm0
+ pshufb %xmm0, IN1
+ pblendvb IN2, IN1
+ movaps IN1, STATE
+ call _aesni_dec1
+
+ pxor IV, STATE
+ movups STATE, (OUTP)
+
+#ifndef __x86_64__
+ popl KLEN
+ popl KEYP
+ popl LEN
+ popl IVP
+#endif
+ FRAME_END
+ ret
+SYM_FUNC_END(aesni_cts_cbc_dec)
+
.pushsection .rodata
.align 16
+.Lcts_permute_table:
+ .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
+ .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
+ .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+ .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+ .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
+ .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
+#ifdef __x86_64__
.Lbswap_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+#endif
.popsection
+#ifdef __x86_64__
/*
* _aesni_inc_init: internal ABI
* setup registers used by _aesni_inc
@@ -2696,6 +2819,14 @@ SYM_FUNC_START(aesni_ctr_enc)
ret
SYM_FUNC_END(aesni_ctr_enc)
+#endif
+
+.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
+.align 16
+.Lgf128mul_x_ble_mask:
+ .octa 0x00000000000000010000000000000087
+.previous
+
/*
* _aesni_gf128mul_x_ble: internal ABI
* Multiply in GF(2^128) for XTS IVs
@@ -2708,120 +2839,325 @@ SYM_FUNC_END(aesni_ctr_enc)
* CTR: == temporary value
*/
#define _aesni_gf128mul_x_ble() \
- pshufd $0x13, IV, CTR; \
+ pshufd $0x13, IV, KEY; \
paddq IV, IV; \
- psrad $31, CTR; \
- pand GF128MUL_MASK, CTR; \
- pxor CTR, IV;
+ psrad $31, KEY; \
+ pand GF128MUL_MASK, KEY; \
+ pxor KEY, IV;
/*
- * void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *dst,
- * const u8 *src, bool enc, le128 *iv)
+ * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
+ * const u8 *src, unsigned int len, le128 *iv)
*/
-SYM_FUNC_START(aesni_xts_crypt8)
+SYM_FUNC_START(aesni_xts_encrypt)
FRAME_BEGIN
- testb %cl, %cl
- movl $0, %ecx
- movl $240, %r10d
- leaq _aesni_enc4, %r11
- leaq _aesni_dec4, %rax
- cmovel %r10d, %ecx
- cmoveq %rax, %r11
-
+#ifndef __x86_64__
+ pushl IVP
+ pushl LEN
+ pushl KEYP
+ pushl KLEN
+ movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
+ movl (FRAME_OFFSET+24)(%esp), OUTP # dst
+ movl (FRAME_OFFSET+28)(%esp), INP # src
+ movl (FRAME_OFFSET+32)(%esp), LEN # len
+ movl (FRAME_OFFSET+36)(%esp), IVP # iv
movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
+#else
+ movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
+#endif
movups (IVP), IV
mov 480(KEYP), KLEN
- addq %rcx, KEYP
+
+.Lxts_enc_loop4:
+ sub $64, LEN
+ jl .Lxts_enc_1x
movdqa IV, STATE1
- movdqu 0x00(INP), INC
- pxor INC, STATE1
+ movdqu 0x00(INP), IN
+ pxor IN, STATE1
movdqu IV, 0x00(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE2
- movdqu 0x10(INP), INC
- pxor INC, STATE2
+ movdqu 0x10(INP), IN
+ pxor IN, STATE2
movdqu IV, 0x10(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE3
- movdqu 0x20(INP), INC
- pxor INC, STATE3
+ movdqu 0x20(INP), IN
+ pxor IN, STATE3
movdqu IV, 0x20(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE4
- movdqu 0x30(INP), INC
- pxor INC, STATE4
+ movdqu 0x30(INP), IN
+ pxor IN, STATE4
movdqu IV, 0x30(OUTP)
- CALL_NOSPEC r11
+ call _aesni_enc4
- movdqu 0x00(OUTP), INC
- pxor INC, STATE1
+ movdqu 0x00(OUTP), IN
+ pxor IN, STATE1
movdqu STATE1, 0x00(OUTP)
+ movdqu 0x10(OUTP), IN
+ pxor IN, STATE2
+ movdqu STATE2, 0x10(OUTP)
+
+ movdqu 0x20(OUTP), IN
+ pxor IN, STATE3
+ movdqu STATE3, 0x20(OUTP)
+
+ movdqu 0x30(OUTP), IN
+ pxor IN, STATE4
+ movdqu STATE4, 0x30(OUTP)
+
_aesni_gf128mul_x_ble()
- movdqa IV, STATE1
- movdqu 0x40(INP), INC
- pxor INC, STATE1
- movdqu IV, 0x40(OUTP)
- movdqu 0x10(OUTP), INC
- pxor INC, STATE2
- movdqu STATE2, 0x10(OUTP)
+ add $64, INP
+ add $64, OUTP
+ test LEN, LEN
+ jnz .Lxts_enc_loop4
+.Lxts_enc_ret_iv:
+ movups IV, (IVP)
+
+.Lxts_enc_ret:
+#ifndef __x86_64__
+ popl KLEN
+ popl KEYP
+ popl LEN
+ popl IVP
+#endif
+ FRAME_END
+ ret
+
+.Lxts_enc_1x:
+ add $64, LEN
+ jz .Lxts_enc_ret_iv
+ sub $16, LEN
+ jl .Lxts_enc_cts4
+
+.Lxts_enc_loop1:
+ movdqu (INP), STATE
+ pxor IV, STATE
+ call _aesni_enc1
+ pxor IV, STATE
_aesni_gf128mul_x_ble()
- movdqa IV, STATE2
- movdqu 0x50(INP), INC
- pxor INC, STATE2
- movdqu IV, 0x50(OUTP)
- movdqu 0x20(OUTP), INC
- pxor INC, STATE3
- movdqu STATE3, 0x20(OUTP)
+ test LEN, LEN
+ jz .Lxts_enc_out
+
+ add $16, INP
+ sub $16, LEN
+ jl .Lxts_enc_cts1
+
+ movdqu STATE, (OUTP)
+ add $16, OUTP
+ jmp .Lxts_enc_loop1
+
+.Lxts_enc_out:
+ movdqu STATE, (OUTP)
+ jmp .Lxts_enc_ret_iv
+
+.Lxts_enc_cts4:
+ movdqa STATE4, STATE
+ sub $16, OUTP
+
+.Lxts_enc_cts1:
+#ifndef __x86_64__
+ lea .Lcts_permute_table, T1
+#else
+ lea .Lcts_permute_table(%rip), T1
+#endif
+ add LEN, INP /* rewind input pointer */
+ add $16, LEN /* # bytes in final block */
+ movups (INP), IN1
+
+ mov T1, IVP
+ add $32, IVP
+ add LEN, T1
+ sub LEN, IVP
+ add OUTP, LEN
+
+ movups (T1), %xmm4
+ movaps STATE, IN2
+ pshufb %xmm4, STATE
+ movups STATE, (LEN)
+
+ movups (IVP), %xmm0
+ pshufb %xmm0, IN1
+ pblendvb IN2, IN1
+ movaps IN1, STATE
+
+ pxor IV, STATE
+ call _aesni_enc1
+ pxor IV, STATE
+
+ movups STATE, (OUTP)
+ jmp .Lxts_enc_ret
+SYM_FUNC_END(aesni_xts_encrypt)
+
+/*
+ * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
+ * const u8 *src, unsigned int len, le128 *iv)
+ */
+SYM_FUNC_START(aesni_xts_decrypt)
+ FRAME_BEGIN
+#ifndef __x86_64__
+ pushl IVP
+ pushl LEN
+ pushl KEYP
+ pushl KLEN
+ movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
+ movl (FRAME_OFFSET+24)(%esp), OUTP # dst
+ movl (FRAME_OFFSET+28)(%esp), INP # src
+ movl (FRAME_OFFSET+32)(%esp), LEN # len
+ movl (FRAME_OFFSET+36)(%esp), IVP # iv
+ movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
+#else
+ movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
+#endif
+ movups (IVP), IV
+
+ mov 480(KEYP), KLEN
+ add $240, KEYP
+
+ test $15, LEN
+ jz .Lxts_dec_loop4
+ sub $16, LEN
+
+.Lxts_dec_loop4:
+ sub $64, LEN
+ jl .Lxts_dec_1x
+
+ movdqa IV, STATE1
+ movdqu 0x00(INP), IN
+ pxor IN, STATE1
+ movdqu IV, 0x00(OUTP)
_aesni_gf128mul_x_ble()
- movdqa IV, STATE3
- movdqu 0x60(INP), INC
- pxor INC, STATE3
- movdqu IV, 0x60(OUTP)
+ movdqa IV, STATE2
+ movdqu 0x10(INP), IN
+ pxor IN, STATE2
+ movdqu IV, 0x10(OUTP)
- movdqu 0x30(OUTP), INC
- pxor INC, STATE4
- movdqu STATE4, 0x30(OUTP)
+ _aesni_gf128mul_x_ble()
+ movdqa IV, STATE3
+ movdqu 0x20(INP), IN
+ pxor IN, STATE3
+ movdqu IV, 0x20(OUTP)
_aesni_gf128mul_x_ble()
movdqa IV, STATE4
- movdqu 0x70(INP), INC
- pxor INC, STATE4
- movdqu IV, 0x70(OUTP)
+ movdqu 0x30(INP), IN
+ pxor IN, STATE4
+ movdqu IV, 0x30(OUTP)
- _aesni_gf128mul_x_ble()
- movups IV, (IVP)
+ call _aesni_dec4
+
+ movdqu 0x00(OUTP), IN
+ pxor IN, STATE1
+ movdqu STATE1, 0x00(OUTP)
+
+ movdqu 0x10(OUTP), IN
+ pxor IN, STATE2
+ movdqu STATE2, 0x10(OUTP)
- CALL_NOSPEC r11
+ movdqu 0x20(OUTP), IN
+ pxor IN, STATE3
+ movdqu STATE3, 0x20(OUTP)
- movdqu 0x40(OUTP), INC
- pxor INC, STATE1
- movdqu STATE1, 0x40(OUTP)
+ movdqu 0x30(OUTP), IN
+ pxor IN, STATE4
+ movdqu STATE4, 0x30(OUTP)
- movdqu 0x50(OUTP), INC
- pxor INC, STATE2
- movdqu STATE2, 0x50(OUTP)
+ _aesni_gf128mul_x_ble()
- movdqu 0x60(OUTP), INC
- pxor INC, STATE3
- movdqu STATE3, 0x60(OUTP)
+ add $64, INP
+ add $64, OUTP
+ test LEN, LEN
+ jnz .Lxts_dec_loop4
- movdqu 0x70(OUTP), INC
- pxor INC, STATE4
- movdqu STATE4, 0x70(OUTP)
+.Lxts_dec_ret_iv:
+ movups IV, (IVP)
+.Lxts_dec_ret:
+#ifndef __x86_64__
+ popl KLEN
+ popl KEYP
+ popl LEN
+ popl IVP
+#endif
FRAME_END
ret
-SYM_FUNC_END(aesni_xts_crypt8)
+.Lxts_dec_1x:
+ add $64, LEN
+ jz .Lxts_dec_ret_iv
+
+.Lxts_dec_loop1:
+ movdqu (INP), STATE
+
+ add $16, INP
+ sub $16, LEN
+ jl .Lxts_dec_cts1
+
+ pxor IV, STATE
+ call _aesni_dec1
+ pxor IV, STATE
+ _aesni_gf128mul_x_ble()
+
+ test LEN, LEN
+ jz .Lxts_dec_out
+
+ movdqu STATE, (OUTP)
+ add $16, OUTP
+ jmp .Lxts_dec_loop1
+
+.Lxts_dec_out:
+ movdqu STATE, (OUTP)
+ jmp .Lxts_dec_ret_iv
+
+.Lxts_dec_cts1:
+ movdqa IV, STATE4
+ _aesni_gf128mul_x_ble()
+
+ pxor IV, STATE
+ call _aesni_dec1
+ pxor IV, STATE
+
+#ifndef __x86_64__
+ lea .Lcts_permute_table, T1
+#else
+ lea .Lcts_permute_table(%rip), T1
#endif
+ add LEN, INP /* rewind input pointer */
+ add $16, LEN /* # bytes in final block */
+ movups (INP), IN1
+
+ mov T1, IVP
+ add $32, IVP
+ add LEN, T1
+ sub LEN, IVP
+ add OUTP, LEN
+
+ movups (T1), %xmm4
+ movaps STATE, IN2
+ pshufb %xmm4, STATE
+ movups STATE, (LEN)
+
+ movups (IVP), %xmm0
+ pshufb %xmm0, IN1
+ pblendvb IN2, IN1
+ movaps IN1, STATE
+
+ pxor STATE4, STATE
+ call _aesni_dec1
+ pxor STATE4, STATE
+
+ movups STATE, (OUTP)
+ jmp .Lxts_dec_ret
+SYM_FUNC_END(aesni_xts_decrypt)
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index ad8a7188a2bf..2144e54a6c89 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -31,11 +31,10 @@
#include <crypto/internal/aead.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
#include <linux/workqueue.h>
#include <linux/spinlock.h>
-#ifdef CONFIG_X86_64
-#include <asm/crypto/glue_helper.h>
-#endif
+#include <linux/static_call.h>
#define AESNI_ALIGN 16
@@ -93,62 +92,25 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
+asmlinkage void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len, u8 *iv);
+asmlinkage void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len, u8 *iv);
#define AVX_GEN2_OPTSIZE 640
#define AVX_GEN4_OPTSIZE 4096
+asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len, u8 *iv);
+
+asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out,
+ const u8 *in, unsigned int len, u8 *iv);
+
#ifdef CONFIG_X86_64
-static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out,
- const u8 *in, unsigned int len, u8 *iv);
asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv);
-
-asmlinkage void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *out,
- const u8 *in, bool enc, le128 *iv);
-
-/* asmlinkage void aesni_gcm_enc()
- * void *ctx, AES Key schedule. Starts on a 16 byte boundary.
- * struct gcm_context_data. May be uninitialized.
- * u8 *out, Ciphertext output. Encrypt in-place is allowed.
- * const u8 *in, Plaintext input
- * unsigned long plaintext_len, Length of data in bytes for encryption.
- * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001.
- * 16-byte aligned pointer.
- * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
- * const u8 *aad, Additional Authentication Data (AAD)
- * unsigned long aad_len, Length of AAD in bytes.
- * u8 *auth_tag, Authenticated Tag output.
- * unsigned long auth_tag_len), Authenticated Tag Length in bytes.
- * Valid values are 16 (most likely), 12 or 8.
- */
-asmlinkage void aesni_gcm_enc(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in, unsigned long plaintext_len, u8 *iv,
- u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
- u8 *auth_tag, unsigned long auth_tag_len);
-
-/* asmlinkage void aesni_gcm_dec()
- * void *ctx, AES Key schedule. Starts on a 16 byte boundary.
- * struct gcm_context_data. May be uninitialized.
- * u8 *out, Plaintext output. Decrypt in-place is allowed.
- * const u8 *in, Ciphertext input
- * unsigned long ciphertext_len, Length of data in bytes for decryption.
- * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001.
- * 16-byte aligned pointer.
- * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary.
- * const u8 *aad, Additional Authentication Data (AAD)
- * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going
- * to be 8 or 12 bytes
- * u8 *auth_tag, Authenticated Tag output.
- * unsigned long auth_tag_len) Authenticated Tag Length in bytes.
- * Valid values are 16 (most likely), 12 or 8.
- */
-asmlinkage void aesni_gcm_dec(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in, unsigned long ciphertext_len, u8 *iv,
- u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
- u8 *auth_tag, unsigned long auth_tag_len);
+DEFINE_STATIC_CALL(aesni_ctr_enc_tfm, aesni_ctr_enc);
/* Scatter / Gather routines, with args similar to above */
asmlinkage void aesni_gcm_init(void *ctx,
@@ -167,24 +129,6 @@ asmlinkage void aesni_gcm_finalize(void *ctx,
struct gcm_context_data *gdata,
u8 *auth_tag, unsigned long auth_tag_len);
-static const struct aesni_gcm_tfm_s {
- void (*init)(void *ctx, struct gcm_context_data *gdata, u8 *iv,
- u8 *hash_subkey, const u8 *aad, unsigned long aad_len);
- void (*enc_update)(void *ctx, struct gcm_context_data *gdata, u8 *out,
- const u8 *in, unsigned long plaintext_len);
- void (*dec_update)(void *ctx, struct gcm_context_data *gdata, u8 *out,
- const u8 *in, unsigned long ciphertext_len);
- void (*finalize)(void *ctx, struct gcm_context_data *gdata,
- u8 *auth_tag, unsigned long auth_tag_len);
-} *aesni_gcm_tfm;
-
-static const struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = {
- .init = &aesni_gcm_init,
- .enc_update = &aesni_gcm_enc_update,
- .dec_update = &aesni_gcm_dec_update,
- .finalize = &aesni_gcm_finalize,
-};
-
asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
void *keys, u8 *out, unsigned int num_bytes);
asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
@@ -214,25 +158,6 @@ asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx,
struct gcm_context_data *gdata,
u8 *auth_tag, unsigned long auth_tag_len);
-asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in, unsigned long plaintext_len, u8 *iv,
- const u8 *aad, unsigned long aad_len,
- u8 *auth_tag, unsigned long auth_tag_len);
-
-asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in, unsigned long ciphertext_len, u8 *iv,
- const u8 *aad, unsigned long aad_len,
- u8 *auth_tag, unsigned long auth_tag_len);
-
-static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = {
- .init = &aesni_gcm_init_avx_gen2,
- .enc_update = &aesni_gcm_enc_update_avx_gen2,
- .dec_update = &aesni_gcm_dec_update_avx_gen2,
- .finalize = &aesni_gcm_finalize_avx_gen2,
-};
-
/*
* asmlinkage void aesni_gcm_init_avx_gen4()
* gcm_data *my_ctx_data, context data
@@ -256,24 +181,8 @@ asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx,
struct gcm_context_data *gdata,
u8 *auth_tag, unsigned long auth_tag_len);
-asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in, unsigned long plaintext_len, u8 *iv,
- const u8 *aad, unsigned long aad_len,
- u8 *auth_tag, unsigned long auth_tag_len);
-
-asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx,
- struct gcm_context_data *gdata, u8 *out,
- const u8 *in, unsigned long ciphertext_len, u8 *iv,
- const u8 *aad, unsigned long aad_len,
- u8 *auth_tag, unsigned long auth_tag_len);
-
-static const struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = {
- .init = &aesni_gcm_init_avx_gen4,
- .enc_update = &aesni_gcm_enc_update_avx_gen4,
- .dec_update = &aesni_gcm_dec_update_avx_gen4,
- .finalize = &aesni_gcm_finalize_avx_gen4,
-};
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(gcm_use_avx2);
static inline struct
aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
@@ -374,16 +283,16 @@ static int ecb_encrypt(struct skcipher_request *req)
unsigned int nbytes;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_ecb_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -396,16 +305,16 @@ static int ecb_decrypt(struct skcipher_request *req)
unsigned int nbytes;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_ecb_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -418,16 +327,16 @@ static int cbc_encrypt(struct skcipher_request *req)
unsigned int nbytes;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
- kernel_fpu_end();
return err;
}
@@ -440,36 +349,133 @@ static int cbc_decrypt(struct skcipher_request *req)
unsigned int nbytes;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_fpu_begin();
while ((nbytes = walk.nbytes)) {
+ kernel_fpu_begin();
aesni_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
nbytes & AES_BLOCK_MASK, walk.iv);
+ kernel_fpu_end();
nbytes &= AES_BLOCK_SIZE - 1;
err = skcipher_walk_done(&walk, nbytes);
}
- kernel_fpu_end();
return err;
}
-#ifdef CONFIG_X86_64
-static void ctr_crypt_final(struct crypto_aes_ctx *ctx,
- struct skcipher_walk *walk)
+static int cts_cbc_encrypt(struct skcipher_request *req)
{
- u8 *ctrblk = walk->iv;
- u8 keystream[AES_BLOCK_SIZE];
- u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
- unsigned int nbytes = walk->nbytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+ int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+ struct scatterlist *src = req->src, *dst = req->dst;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct skcipher_walk walk;
+ int err;
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
+ NULL, NULL);
+
+ if (req->cryptlen <= AES_BLOCK_SIZE) {
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+ cbc_blocks = 1;
+ }
+
+ if (cbc_blocks > 0) {
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = cbc_encrypt(&subreq);
+ if (err)
+ return err;
+
+ if (req->cryptlen == AES_BLOCK_SIZE)
+ return 0;
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst,
+ subreq.cryptlen);
+ }
+
+ /* handle ciphertext stealing */
+ skcipher_request_set_crypt(&subreq, src, dst,
+ req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &subreq, false);
+ if (err)
+ return err;
+
+ kernel_fpu_begin();
+ aesni_cts_cbc_enc(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes, walk.iv);
+ kernel_fpu_end();
+
+ return skcipher_walk_done(&walk, 0);
+}
+
+static int cts_cbc_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+ int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+ struct scatterlist *src = req->src, *dst = req->dst;
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct skcipher_request subreq;
+ struct skcipher_walk walk;
+ int err;
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
+ NULL, NULL);
+
+ if (req->cryptlen <= AES_BLOCK_SIZE) {
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+ cbc_blocks = 1;
+ }
+
+ if (cbc_blocks > 0) {
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
+
+ err = cbc_decrypt(&subreq);
+ if (err)
+ return err;
+
+ if (req->cryptlen == AES_BLOCK_SIZE)
+ return 0;
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst,
+ subreq.cryptlen);
+ }
+
+ /* handle ciphertext stealing */
+ skcipher_request_set_crypt(&subreq, src, dst,
+ req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
+ req->iv);
- aesni_enc(ctx, keystream, ctrblk);
- crypto_xor_cpy(dst, keystream, src, nbytes);
+ err = skcipher_walk_virt(&walk, &subreq, false);
+ if (err)
+ return err;
+
+ kernel_fpu_begin();
+ aesni_cts_cbc_dec(ctx, walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes, walk.iv);
+ kernel_fpu_end();
- crypto_inc(ctrblk, AES_BLOCK_SIZE);
+ return skcipher_walk_done(&walk, 0);
}
+#ifdef CONFIG_X86_64
static void aesni_ctr_enc_avx_tfm(struct crypto_aes_ctx *ctx, u8 *out,
const u8 *in, unsigned int len, u8 *iv)
{
@@ -491,120 +497,36 @@ static int ctr_crypt(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = aes_ctx(crypto_skcipher_ctx(tfm));
+ u8 keystream[AES_BLOCK_SIZE];
struct skcipher_walk walk;
unsigned int nbytes;
int err;
- err = skcipher_walk_virt(&walk, req, true);
+ err = skcipher_walk_virt(&walk, req, false);
- kernel_fpu_begin();
- while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
- aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes & AES_BLOCK_MASK, walk.iv);
- nbytes &= AES_BLOCK_SIZE - 1;
+ while ((nbytes = walk.nbytes) > 0) {
+ kernel_fpu_begin();
+ if (nbytes & AES_BLOCK_MASK)
+ static_call(aesni_ctr_enc_tfm)(ctx, walk.dst.virt.addr,
+ walk.src.virt.addr,
+ nbytes & AES_BLOCK_MASK,
+ walk.iv);
+ nbytes &= ~AES_BLOCK_MASK;
+
+ if (walk.nbytes == walk.total && nbytes > 0) {
+ aesni_enc(ctx, keystream, walk.iv);
+ crypto_xor_cpy(walk.dst.virt.addr + walk.nbytes - nbytes,
+ walk.src.virt.addr + walk.nbytes - nbytes,
+ keystream, nbytes);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ nbytes = 0;
+ }
+ kernel_fpu_end();
err = skcipher_walk_done(&walk, nbytes);
}
- if (walk.nbytes) {
- ctr_crypt_final(ctx, &walk);
- err = skcipher_walk_done(&walk, 0);
- }
- kernel_fpu_end();
-
return err;
}
-static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- keylen /= 2;
-
- /* first half of xts-key is for crypt */
- err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx,
- key, keylen);
- if (err)
- return err;
-
- /* second half of xts-key is for tweak */
- return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx,
- key + keylen, keylen);
-}
-
-
-static void aesni_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
-{
- glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_enc);
-}
-
-static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
-{
- glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec);
-}
-
-static void aesni_xts_enc8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
-{
- aesni_xts_crypt8(ctx, dst, src, true, iv);
-}
-
-static void aesni_xts_dec8(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
-{
- aesni_xts_crypt8(ctx, dst, src, false, iv);
-}
-
-static const struct common_glue_ctx aesni_enc_xts = {
- .num_funcs = 2,
- .fpu_blocks_limit = 1,
-
- .funcs = { {
- .num_blocks = 8,
- .fn_u = { .xts = aesni_xts_enc8 }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = aesni_xts_enc }
- } }
-};
-
-static const struct common_glue_ctx aesni_dec_xts = {
- .num_funcs = 2,
- .fpu_blocks_limit = 1,
-
- .funcs = { {
- .num_blocks = 8,
- .fn_u = { .xts = aesni_xts_dec8 }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = aesni_xts_dec }
- } }
-};
-
-static int xts_encrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&aesni_enc_xts, req, aesni_enc,
- aes_ctx(ctx->raw_tweak_ctx),
- aes_ctx(ctx->raw_crypt_ctx),
- false);
-}
-
-static int xts_decrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&aesni_dec_xts, req, aesni_enc,
- aes_ctx(ctx->raw_tweak_ctx),
- aes_ctx(ctx->raw_crypt_ctx),
- true);
-}
-
static int
rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
{
@@ -681,42 +603,35 @@ static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
unsigned int assoclen, u8 *hash_subkey,
- u8 *iv, void *aes_ctx)
+ u8 *iv, void *aes_ctx, u8 *auth_tag,
+ unsigned long auth_tag_len)
{
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- unsigned long auth_tag_len = crypto_aead_authsize(tfm);
- const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm;
- struct gcm_context_data data AESNI_ALIGN_ATTR;
- struct scatter_walk dst_sg_walk = {};
+ u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8);
+ struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN);
unsigned long left = req->cryptlen;
- unsigned long len, srclen, dstlen;
struct scatter_walk assoc_sg_walk;
- struct scatter_walk src_sg_walk;
- struct scatterlist src_start[2];
- struct scatterlist dst_start[2];
- struct scatterlist *src_sg;
- struct scatterlist *dst_sg;
- u8 *src, *dst, *assoc;
+ struct skcipher_walk walk;
+ bool do_avx, do_avx2;
u8 *assocmem = NULL;
- u8 authTag[16];
+ u8 *assoc;
+ int err;
if (!enc)
left -= auth_tag_len;
- if (left < AVX_GEN4_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen4)
- gcm_tfm = &aesni_gcm_tfm_avx_gen2;
- if (left < AVX_GEN2_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen2)
- gcm_tfm = &aesni_gcm_tfm_sse;
+ do_avx = (left >= AVX_GEN2_OPTSIZE);
+ do_avx2 = (left >= AVX_GEN4_OPTSIZE);
/* Linearize assoc, if not already linear */
- if (req->src->length >= assoclen && req->src->length &&
- (!PageHighMem(sg_page(req->src)) ||
- req->src->offset + req->src->length <= PAGE_SIZE)) {
+ if (req->src->length >= assoclen && req->src->length) {
scatterwalk_start(&assoc_sg_walk, req->src);
assoc = scatterwalk_map(&assoc_sg_walk);
} else {
+ gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+ GFP_KERNEL : GFP_ATOMIC;
+
/* assoc can be any length, so must be on heap */
- assocmem = kmalloc(assoclen, GFP_ATOMIC);
+ assocmem = kmalloc(assoclen, flags);
if (unlikely(!assocmem))
return -ENOMEM;
assoc = assocmem;
@@ -724,62 +639,15 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
}
- if (left) {
- src_sg = scatterwalk_ffwd(src_start, req->src, req->assoclen);
- scatterwalk_start(&src_sg_walk, src_sg);
- if (req->src != req->dst) {
- dst_sg = scatterwalk_ffwd(dst_start, req->dst,
- req->assoclen);
- scatterwalk_start(&dst_sg_walk, dst_sg);
- }
- }
-
kernel_fpu_begin();
- gcm_tfm->init(aes_ctx, &data, iv,
- hash_subkey, assoc, assoclen);
- if (req->src != req->dst) {
- while (left) {
- src = scatterwalk_map(&src_sg_walk);
- dst = scatterwalk_map(&dst_sg_walk);
- srclen = scatterwalk_clamp(&src_sg_walk, left);
- dstlen = scatterwalk_clamp(&dst_sg_walk, left);
- len = min(srclen, dstlen);
- if (len) {
- if (enc)
- gcm_tfm->enc_update(aes_ctx, &data,
- dst, src, len);
- else
- gcm_tfm->dec_update(aes_ctx, &data,
- dst, src, len);
- }
- left -= len;
-
- scatterwalk_unmap(src);
- scatterwalk_unmap(dst);
- scatterwalk_advance(&src_sg_walk, len);
- scatterwalk_advance(&dst_sg_walk, len);
- scatterwalk_done(&src_sg_walk, 0, left);
- scatterwalk_done(&dst_sg_walk, 1, left);
- }
- } else {
- while (left) {
- dst = src = scatterwalk_map(&src_sg_walk);
- len = scatterwalk_clamp(&src_sg_walk, left);
- if (len) {
- if (enc)
- gcm_tfm->enc_update(aes_ctx, &data,
- src, src, len);
- else
- gcm_tfm->dec_update(aes_ctx, &data,
- src, src, len);
- }
- left -= len;
- scatterwalk_unmap(src);
- scatterwalk_advance(&src_sg_walk, len);
- scatterwalk_done(&src_sg_walk, 1, left);
- }
- }
- gcm_tfm->finalize(aes_ctx, &data, authTag, auth_tag_len);
+ if (static_branch_likely(&gcm_use_avx2) && do_avx2)
+ aesni_gcm_init_avx_gen4(aes_ctx, data, iv, hash_subkey, assoc,
+ assoclen);
+ else if (static_branch_likely(&gcm_use_avx) && do_avx)
+ aesni_gcm_init_avx_gen2(aes_ctx, data, iv, hash_subkey, assoc,
+ assoclen);
+ else
+ aesni_gcm_init(aes_ctx, data, iv, hash_subkey, assoc, assoclen);
kernel_fpu_end();
if (!assocmem)
@@ -787,24 +655,58 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
else
kfree(assocmem);
- if (!enc) {
- u8 authTagMsg[16];
+ err = enc ? skcipher_walk_aead_encrypt(&walk, req, false)
+ : skcipher_walk_aead_decrypt(&walk, req, false);
- /* Copy out original authTag */
- scatterwalk_map_and_copy(authTagMsg, req->src,
- req->assoclen + req->cryptlen -
- auth_tag_len,
- auth_tag_len, 0);
+ while (walk.nbytes > 0) {
+ kernel_fpu_begin();
+ if (static_branch_likely(&gcm_use_avx2) && do_avx2) {
+ if (enc)
+ aesni_gcm_enc_update_avx_gen4(aes_ctx, data,
+ walk.dst.virt.addr,
+ walk.src.virt.addr,
+ walk.nbytes);
+ else
+ aesni_gcm_dec_update_avx_gen4(aes_ctx, data,
+ walk.dst.virt.addr,
+ walk.src.virt.addr,
+ walk.nbytes);
+ } else if (static_branch_likely(&gcm_use_avx) && do_avx) {
+ if (enc)
+ aesni_gcm_enc_update_avx_gen2(aes_ctx, data,
+ walk.dst.virt.addr,
+ walk.src.virt.addr,
+ walk.nbytes);
+ else
+ aesni_gcm_dec_update_avx_gen2(aes_ctx, data,
+ walk.dst.virt.addr,
+ walk.src.virt.addr,
+ walk.nbytes);
+ } else if (enc) {
+ aesni_gcm_enc_update(aes_ctx, data, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.nbytes);
+ } else {
+ aesni_gcm_dec_update(aes_ctx, data, walk.dst.virt.addr,
+ walk.src.virt.addr, walk.nbytes);
+ }
+ kernel_fpu_end();
- /* Compare generated tag with passed in tag. */
- return crypto_memneq(authTagMsg, authTag, auth_tag_len) ?
- -EBADMSG : 0;
+ err = skcipher_walk_done(&walk, 0);
}
- /* Copy in the authTag */
- scatterwalk_map_and_copy(authTag, req->dst,
- req->assoclen + req->cryptlen,
- auth_tag_len, 1);
+ if (err)
+ return err;
+
+ kernel_fpu_begin();
+ if (static_branch_likely(&gcm_use_avx2) && do_avx2)
+ aesni_gcm_finalize_avx_gen4(aes_ctx, data, auth_tag,
+ auth_tag_len);
+ else if (static_branch_likely(&gcm_use_avx) && do_avx)
+ aesni_gcm_finalize_avx_gen2(aes_ctx, data, auth_tag,
+ auth_tag_len);
+ else
+ aesni_gcm_finalize(aes_ctx, data, auth_tag, auth_tag_len);
+ kernel_fpu_end();
return 0;
}
@@ -812,15 +714,47 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
u8 *hash_subkey, u8 *iv, void *aes_ctx)
{
- return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv,
- aes_ctx);
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ unsigned long auth_tag_len = crypto_aead_authsize(tfm);
+ u8 auth_tag[16];
+ int err;
+
+ err = gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv, aes_ctx,
+ auth_tag, auth_tag_len);
+ if (err)
+ return err;
+
+ scatterwalk_map_and_copy(auth_tag, req->dst,
+ req->assoclen + req->cryptlen,
+ auth_tag_len, 1);
+ return 0;
}
static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
u8 *hash_subkey, u8 *iv, void *aes_ctx)
{
- return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv,
- aes_ctx);
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ unsigned long auth_tag_len = crypto_aead_authsize(tfm);
+ u8 auth_tag_msg[16];
+ u8 auth_tag[16];
+ int err;
+
+ err = gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv, aes_ctx,
+ auth_tag, auth_tag_len);
+ if (err)
+ return err;
+
+ /* Copy out original auth_tag */
+ scatterwalk_map_and_copy(auth_tag_msg, req->src,
+ req->assoclen + req->cryptlen - auth_tag_len,
+ auth_tag_len, 0);
+
+ /* Compare generated tag with passed in tag. */
+ if (crypto_memneq(auth_tag_msg, auth_tag, auth_tag_len)) {
+ memzero_explicit(auth_tag, sizeof(auth_tag));
+ return -EBADMSG;
+ }
+ return 0;
}
static int helper_rfc4106_encrypt(struct aead_request *req)
@@ -828,7 +762,8 @@ static int helper_rfc4106_encrypt(struct aead_request *req)
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
void *aes_ctx = &(ctx->aes_key_expanded);
- u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+ u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
+ u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
unsigned int i;
__be32 counter = cpu_to_be32(1);
@@ -855,7 +790,8 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
void *aes_ctx = &(ctx->aes_key_expanded);
- u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+ u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
+ u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
unsigned int i;
if (unlikely(req->assoclen != 16 && req->assoclen != 20))
@@ -877,6 +813,128 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
}
#endif
+static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int err;
+
+ err = xts_verify_key(tfm, key, keylen);
+ if (err)
+ return err;
+
+ keylen /= 2;
+
+ /* first half of xts-key is for crypt */
+ err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx,
+ key, keylen);
+ if (err)
+ return err;
+
+ /* second half of xts-key is for tweak */
+ return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx,
+ key + keylen, keylen);
+}
+
+static int xts_crypt(struct skcipher_request *req, bool encrypt)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct aesni_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ struct skcipher_request subreq;
+ struct skcipher_walk walk;
+ int err;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+ int blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+
+ skcipher_walk_abort(&walk);
+
+ skcipher_request_set_tfm(&subreq, tfm);
+ skcipher_request_set_callback(&subreq,
+ skcipher_request_flags(req),
+ NULL, NULL);
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ blocks * AES_BLOCK_SIZE, req->iv);
+ req = &subreq;
+ err = skcipher_walk_virt(&walk, req, false);
+ } else {
+ tail = 0;
+ }
+
+ kernel_fpu_begin();
+
+ /* calculate first value of T */
+ aesni_enc(aes_ctx(ctx->raw_tweak_ctx), walk.iv, walk.iv);
+
+ while (walk.nbytes > 0) {
+ int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes &= ~(AES_BLOCK_SIZE - 1);
+
+ if (encrypt)
+ aesni_xts_encrypt(aes_ctx(ctx->raw_crypt_ctx),
+ walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, walk.iv);
+ else
+ aesni_xts_decrypt(aes_ctx(ctx->raw_crypt_ctx),
+ walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, walk.iv);
+ kernel_fpu_end();
+
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+
+ if (walk.nbytes > 0)
+ kernel_fpu_begin();
+ }
+
+ if (unlikely(tail > 0 && !err)) {
+ struct scatterlist sg_src[2], sg_dst[2];
+ struct scatterlist *src, *dst;
+
+ dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+ if (req->dst != req->src)
+ dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+ skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+ req->iv);
+
+ err = skcipher_walk_virt(&walk, &subreq, false);
+ if (err)
+ return err;
+
+ kernel_fpu_begin();
+ if (encrypt)
+ aesni_xts_encrypt(aes_ctx(ctx->raw_crypt_ctx),
+ walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes, walk.iv);
+ else
+ aesni_xts_decrypt(aes_ctx(ctx->raw_crypt_ctx),
+ walk.dst.virt.addr, walk.src.virt.addr,
+ walk.nbytes, walk.iv);
+ kernel_fpu_end();
+
+ err = skcipher_walk_done(&walk, 0);
+ }
+ return err;
+}
+
+static int xts_encrypt(struct skcipher_request *req)
+{
+ return xts_crypt(req, true);
+}
+
+static int xts_decrypt(struct skcipher_request *req)
+{
+ return xts_crypt(req, false);
+}
+
static struct crypto_alg aesni_cipher_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-aesni",
@@ -928,6 +986,23 @@ static struct skcipher_alg aesni_skciphers[] = {
.setkey = aesni_skcipher_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
+ }, {
+ .base = {
+ .cra_name = "__cts(cbc(aes))",
+ .cra_driver_name = "__cts-cbc-aes-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = CRYPTO_AES_CTX_SIZE,
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .walksize = 2 * AES_BLOCK_SIZE,
+ .setkey = aesni_skcipher_setkey,
+ .encrypt = cts_cbc_encrypt,
+ .decrypt = cts_cbc_decrypt,
#ifdef CONFIG_X86_64
}, {
.base = {
@@ -946,6 +1021,7 @@ static struct skcipher_alg aesni_skciphers[] = {
.setkey = aesni_skcipher_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
+#endif
}, {
.base = {
.cra_name = "__xts(aes)",
@@ -959,10 +1035,10 @@ static struct skcipher_alg aesni_skciphers[] = {
.min_keysize = 2 * AES_MIN_KEY_SIZE,
.max_keysize = 2 * AES_MAX_KEY_SIZE,
.ivsize = AES_BLOCK_SIZE,
+ .walksize = 2 * AES_BLOCK_SIZE,
.setkey = xts_aesni_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
-#endif
}
};
@@ -985,7 +1061,8 @@ static int generic_gcmaes_encrypt(struct aead_request *req)
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
void *aes_ctx = &(ctx->aes_key_expanded);
- u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+ u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
+ u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
__be32 counter = cpu_to_be32(1);
memcpy(iv, req->iv, 12);
@@ -1001,7 +1078,8 @@ static int generic_gcmaes_decrypt(struct aead_request *req)
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm);
void *aes_ctx = &(ctx->aes_key_expanded);
- u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN)));
+ u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8);
+ u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN);
memcpy(iv, req->iv, 12);
*((__be32 *)(iv+12)) = counter;
@@ -1066,19 +1144,18 @@ static int __init aesni_init(void)
#ifdef CONFIG_X86_64
if (boot_cpu_has(X86_FEATURE_AVX2)) {
pr_info("AVX2 version of gcm_enc/dec engaged.\n");
- aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen4;
+ static_branch_enable(&gcm_use_avx);
+ static_branch_enable(&gcm_use_avx2);
} else
if (boot_cpu_has(X86_FEATURE_AVX)) {
pr_info("AVX version of gcm_enc/dec engaged.\n");
- aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen2;
+ static_branch_enable(&gcm_use_avx);
} else {
pr_info("SSE version of gcm_enc/dec engaged.\n");
- aesni_gcm_tfm = &aesni_gcm_tfm_sse;
}
- aesni_ctr_enc_tfm = aesni_ctr_enc;
if (boot_cpu_has(X86_FEATURE_AVX)) {
/* optimize performance of ctr mode encryption transform */
- aesni_ctr_enc_tfm = aesni_ctr_enc_avx_tfm;
+ static_call_update(aesni_ctr_enc_tfm, aesni_ctr_enc_avx_tfm);
pr_info("AES CTR mode by8 optimization enabled\n");
}
#endif
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
index c025a01cf708..a40365ab301e 100644
--- a/arch/x86/crypto/blake2s-glue.c
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -58,138 +58,40 @@ void blake2s_compress_arch(struct blake2s_state *state,
}
EXPORT_SYMBOL(blake2s_compress_arch);
-static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen)
+static int crypto_blake2s_update_x86(struct shash_desc *desc,
+ const u8 *in, unsigned int inlen)
{
- struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
-
- if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE)
- return -EINVAL;
-
- memcpy(tctx->key, key, keylen);
- tctx->keylen = keylen;
-
- return 0;
+ return crypto_blake2s_update(desc, in, inlen, blake2s_compress_arch);
}
-static int crypto_blake2s_init(struct shash_desc *desc)
+static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
{
- struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
- struct blake2s_state *state = shash_desc_ctx(desc);
- const int outlen = crypto_shash_digestsize(desc->tfm);
-
- if (tctx->keylen)
- blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
- else
- blake2s_init(state, outlen);
-
- return 0;
+ return crypto_blake2s_final(desc, out, blake2s_compress_arch);
}
-static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
- unsigned int inlen)
-{
- struct blake2s_state *state = shash_desc_ctx(desc);
- const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
-
- if (unlikely(!inlen))
- return 0;
- if (inlen > fill) {
- memcpy(state->buf + state->buflen, in, fill);
- blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
- state->buflen = 0;
- in += fill;
- inlen -= fill;
+#define BLAKE2S_ALG(name, driver_name, digest_size) \
+ { \
+ .base.cra_name = name, \
+ .base.cra_driver_name = driver_name, \
+ .base.cra_priority = 200, \
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \
+ .base.cra_module = THIS_MODULE, \
+ .digestsize = digest_size, \
+ .setkey = crypto_blake2s_setkey, \
+ .init = crypto_blake2s_init, \
+ .update = crypto_blake2s_update_x86, \
+ .final = crypto_blake2s_final_x86, \
+ .descsize = sizeof(struct blake2s_state), \
}
- if (inlen > BLAKE2S_BLOCK_SIZE) {
- const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
- /* Hash one less (full) block than strictly possible */
- blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
- in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
- inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
- }
- memcpy(state->buf + state->buflen, in, inlen);
- state->buflen += inlen;
-
- return 0;
-}
-
-static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
-{
- struct blake2s_state *state = shash_desc_ctx(desc);
-
- blake2s_set_lastblock(state);
- memset(state->buf + state->buflen, 0,
- BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
- blake2s_compress_arch(state, state->buf, 1, state->buflen);
- cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
- memcpy(out, state->h, state->outlen);
- memzero_explicit(state, sizeof(*state));
-
- return 0;
-}
-static struct shash_alg blake2s_algs[] = {{
- .base.cra_name = "blake2s-128",
- .base.cra_driver_name = "blake2s-128-x86",
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
- .base.cra_priority = 200,
- .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
-
- .digestsize = BLAKE2S_128_HASH_SIZE,
- .setkey = crypto_blake2s_setkey,
- .init = crypto_blake2s_init,
- .update = crypto_blake2s_update,
- .final = crypto_blake2s_final,
- .descsize = sizeof(struct blake2s_state),
-}, {
- .base.cra_name = "blake2s-160",
- .base.cra_driver_name = "blake2s-160-x86",
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
- .base.cra_priority = 200,
- .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
-
- .digestsize = BLAKE2S_160_HASH_SIZE,
- .setkey = crypto_blake2s_setkey,
- .init = crypto_blake2s_init,
- .update = crypto_blake2s_update,
- .final = crypto_blake2s_final,
- .descsize = sizeof(struct blake2s_state),
-}, {
- .base.cra_name = "blake2s-224",
- .base.cra_driver_name = "blake2s-224-x86",
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
- .base.cra_priority = 200,
- .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
-
- .digestsize = BLAKE2S_224_HASH_SIZE,
- .setkey = crypto_blake2s_setkey,
- .init = crypto_blake2s_init,
- .update = crypto_blake2s_update,
- .final = crypto_blake2s_final,
- .descsize = sizeof(struct blake2s_state),
-}, {
- .base.cra_name = "blake2s-256",
- .base.cra_driver_name = "blake2s-256-x86",
- .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
- .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
- .base.cra_priority = 200,
- .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
- .base.cra_module = THIS_MODULE,
-
- .digestsize = BLAKE2S_256_HASH_SIZE,
- .setkey = crypto_blake2s_setkey,
- .init = crypto_blake2s_init,
- .update = crypto_blake2s_update,
- .final = crypto_blake2s_final,
- .descsize = sizeof(struct blake2s_state),
-}};
+static struct shash_alg blake2s_algs[] = {
+ BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE),
+ BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE),
+ BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE),
+ BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE),
+};
static int __init blake2s_mod_init(void)
{
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index cedfdba69ce3..a880e0b1c255 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -6,8 +6,6 @@
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*/
#include <crypto/algapi.h>
@@ -247,97 +245,6 @@ static int cbc_decrypt(struct skcipher_request *req)
return err;
}
-static void ctr_crypt_final(struct bf_ctx *ctx, struct skcipher_walk *walk)
-{
- u8 *ctrblk = walk->iv;
- u8 keystream[BF_BLOCK_SIZE];
- u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
- unsigned int nbytes = walk->nbytes;
-
- blowfish_enc_blk(ctx, keystream, ctrblk);
- crypto_xor_cpy(dst, keystream, src, nbytes);
-
- crypto_inc(ctrblk, BF_BLOCK_SIZE);
-}
-
-static unsigned int __ctr_crypt(struct bf_ctx *ctx, struct skcipher_walk *walk)
-{
- unsigned int bsize = BF_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u64 *src = (u64 *)walk->src.virt.addr;
- u64 *dst = (u64 *)walk->dst.virt.addr;
- u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
- __be64 ctrblocks[4];
-
- /* Process four block batch */
- if (nbytes >= bsize * 4) {
- do {
- if (dst != src) {
- dst[0] = src[0];
- dst[1] = src[1];
- dst[2] = src[2];
- dst[3] = src[3];
- }
-
- /* create ctrblks for parallel encrypt */
- ctrblocks[0] = cpu_to_be64(ctrblk++);
- ctrblocks[1] = cpu_to_be64(ctrblk++);
- ctrblocks[2] = cpu_to_be64(ctrblk++);
- ctrblocks[3] = cpu_to_be64(ctrblk++);
-
- blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
- (u8 *)ctrblocks);
-
- src += 4;
- dst += 4;
- } while ((nbytes -= bsize * 4) >= bsize * 4);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- if (dst != src)
- *dst = *src;
-
- ctrblocks[0] = cpu_to_be64(ctrblk++);
-
- blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)ctrblocks);
-
- src += 1;
- dst += 1;
- } while ((nbytes -= bsize) >= bsize);
-
-done:
- *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
- return nbytes;
-}
-
-static int ctr_crypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct bf_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
- nbytes = __ctr_crypt(ctx, &walk);
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- if (nbytes) {
- ctr_crypt_final(ctx, &walk);
- err = skcipher_walk_done(&walk, 0);
- }
-
- return err;
-}
-
static struct crypto_alg bf_cipher_alg = {
.cra_name = "blowfish",
.cra_driver_name = "blowfish-asm",
@@ -384,20 +291,6 @@ static struct skcipher_alg bf_skcipher_algs[] = {
.setkey = blowfish_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- }, {
- .base.cra_name = "ctr(blowfish)",
- .base.cra_driver_name = "ctr-blowfish-asm",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct bf_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = BF_MIN_KEY_SIZE,
- .max_keysize = BF_MAX_KEY_SIZE,
- .ivsize = BF_BLOCK_SIZE,
- .chunksize = BF_BLOCK_SIZE,
- .setkey = blowfish_setkey_skcipher,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
},
};
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index ecc0a9a905c4..e2a0e0f4bf9d 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -17,7 +17,6 @@
#include <linux/linkage.h>
#include <asm/frame.h>
-#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -589,14 +588,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.long 0x80808080
.long 0x80808080
-/* For CTR-mode IV byteswap */
-.Lbswap128_mask:
- .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-
-/* For XTS mode IV generation */
-.Lxts_gf128mul_and_shl1_mask:
- .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
-
/*
* pre-SubByte transform
*
@@ -998,292 +989,3 @@ SYM_FUNC_START(camellia_cbc_dec_16way)
FRAME_END
ret;
SYM_FUNC_END(camellia_cbc_dec_16way)
-
-#define inc_le128(x, minus_one, tmp) \
- vpcmpeqq minus_one, x, tmp; \
- vpsubq minus_one, x, x; \
- vpslldq $8, tmp, tmp; \
- vpsubq tmp, x, x;
-
-SYM_FUNC_START(camellia_ctr_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (little endian, 128bit)
- */
- FRAME_BEGIN
-
- subq $(16 * 16), %rsp;
- movq %rsp, %rax;
-
- vmovdqa .Lbswap128_mask, %xmm14;
-
- /* load IV and byteswap */
- vmovdqu (%rcx), %xmm0;
- vpshufb %xmm14, %xmm0, %xmm15;
- vmovdqu %xmm15, 15 * 16(%rax);
-
- vpcmpeqd %xmm15, %xmm15, %xmm15;
- vpsrldq $8, %xmm15, %xmm15; /* low: -1, high: 0 */
-
- /* construct IVs */
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm13;
- vmovdqu %xmm13, 14 * 16(%rax);
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm13;
- vmovdqu %xmm13, 13 * 16(%rax);
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm12;
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm11;
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm10;
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm9;
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm8;
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm7;
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm6;
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm5;
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm4;
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm3;
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm2;
- inc_le128(%xmm0, %xmm15, %xmm13);
- vpshufb %xmm14, %xmm0, %xmm1;
- inc_le128(%xmm0, %xmm15, %xmm13);
- vmovdqa %xmm0, %xmm13;
- vpshufb %xmm14, %xmm0, %xmm0;
- inc_le128(%xmm13, %xmm15, %xmm14);
- vmovdqu %xmm13, (%rcx);
-
- /* inpack16_pre: */
- vmovq (key_table)(CTX), %xmm15;
- vpshufb .Lpack_bswap, %xmm15, %xmm15;
- vpxor %xmm0, %xmm15, %xmm0;
- vpxor %xmm1, %xmm15, %xmm1;
- vpxor %xmm2, %xmm15, %xmm2;
- vpxor %xmm3, %xmm15, %xmm3;
- vpxor %xmm4, %xmm15, %xmm4;
- vpxor %xmm5, %xmm15, %xmm5;
- vpxor %xmm6, %xmm15, %xmm6;
- vpxor %xmm7, %xmm15, %xmm7;
- vpxor %xmm8, %xmm15, %xmm8;
- vpxor %xmm9, %xmm15, %xmm9;
- vpxor %xmm10, %xmm15, %xmm10;
- vpxor %xmm11, %xmm15, %xmm11;
- vpxor %xmm12, %xmm15, %xmm12;
- vpxor 13 * 16(%rax), %xmm15, %xmm13;
- vpxor 14 * 16(%rax), %xmm15, %xmm14;
- vpxor 15 * 16(%rax), %xmm15, %xmm15;
-
- call __camellia_enc_blk16;
-
- addq $(16 * 16), %rsp;
-
- vpxor 0 * 16(%rdx), %xmm7, %xmm7;
- vpxor 1 * 16(%rdx), %xmm6, %xmm6;
- vpxor 2 * 16(%rdx), %xmm5, %xmm5;
- vpxor 3 * 16(%rdx), %xmm4, %xmm4;
- vpxor 4 * 16(%rdx), %xmm3, %xmm3;
- vpxor 5 * 16(%rdx), %xmm2, %xmm2;
- vpxor 6 * 16(%rdx), %xmm1, %xmm1;
- vpxor 7 * 16(%rdx), %xmm0, %xmm0;
- vpxor 8 * 16(%rdx), %xmm15, %xmm15;
- vpxor 9 * 16(%rdx), %xmm14, %xmm14;
- vpxor 10 * 16(%rdx), %xmm13, %xmm13;
- vpxor 11 * 16(%rdx), %xmm12, %xmm12;
- vpxor 12 * 16(%rdx), %xmm11, %xmm11;
- vpxor 13 * 16(%rdx), %xmm10, %xmm10;
- vpxor 14 * 16(%rdx), %xmm9, %xmm9;
- vpxor 15 * 16(%rdx), %xmm8, %xmm8;
- write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
- %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
- %xmm8, %rsi);
-
- FRAME_END
- ret;
-SYM_FUNC_END(camellia_ctr_16way)
-
-#define gf128mul_x_ble(iv, mask, tmp) \
- vpsrad $31, iv, tmp; \
- vpaddq iv, iv, iv; \
- vpshufd $0x13, tmp, tmp; \
- vpand mask, tmp, tmp; \
- vpxor tmp, iv, iv;
-
-.align 8
-SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- * %r8: index for input whitening key
- * %r9: pointer to __camellia_enc_blk16 or __camellia_dec_blk16
- */
- FRAME_BEGIN
-
- subq $(16 * 16), %rsp;
- movq %rsp, %rax;
-
- vmovdqa .Lxts_gf128mul_and_shl1_mask, %xmm14;
-
- /* load IV */
- vmovdqu (%rcx), %xmm0;
- vpxor 0 * 16(%rdx), %xmm0, %xmm15;
- vmovdqu %xmm15, 15 * 16(%rax);
- vmovdqu %xmm0, 0 * 16(%rsi);
-
- /* construct IVs */
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 1 * 16(%rdx), %xmm0, %xmm15;
- vmovdqu %xmm15, 14 * 16(%rax);
- vmovdqu %xmm0, 1 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 2 * 16(%rdx), %xmm0, %xmm13;
- vmovdqu %xmm0, 2 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 3 * 16(%rdx), %xmm0, %xmm12;
- vmovdqu %xmm0, 3 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 4 * 16(%rdx), %xmm0, %xmm11;
- vmovdqu %xmm0, 4 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 5 * 16(%rdx), %xmm0, %xmm10;
- vmovdqu %xmm0, 5 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 6 * 16(%rdx), %xmm0, %xmm9;
- vmovdqu %xmm0, 6 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 7 * 16(%rdx), %xmm0, %xmm8;
- vmovdqu %xmm0, 7 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 8 * 16(%rdx), %xmm0, %xmm7;
- vmovdqu %xmm0, 8 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 9 * 16(%rdx), %xmm0, %xmm6;
- vmovdqu %xmm0, 9 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 10 * 16(%rdx), %xmm0, %xmm5;
- vmovdqu %xmm0, 10 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 11 * 16(%rdx), %xmm0, %xmm4;
- vmovdqu %xmm0, 11 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 12 * 16(%rdx), %xmm0, %xmm3;
- vmovdqu %xmm0, 12 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 13 * 16(%rdx), %xmm0, %xmm2;
- vmovdqu %xmm0, 13 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 14 * 16(%rdx), %xmm0, %xmm1;
- vmovdqu %xmm0, 14 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vpxor 15 * 16(%rdx), %xmm0, %xmm15;
- vmovdqu %xmm15, 0 * 16(%rax);
- vmovdqu %xmm0, 15 * 16(%rsi);
-
- gf128mul_x_ble(%xmm0, %xmm14, %xmm15);
- vmovdqu %xmm0, (%rcx);
-
- /* inpack16_pre: */
- vmovq (key_table)(CTX, %r8, 8), %xmm15;
- vpshufb .Lpack_bswap, %xmm15, %xmm15;
- vpxor 0 * 16(%rax), %xmm15, %xmm0;
- vpxor %xmm1, %xmm15, %xmm1;
- vpxor %xmm2, %xmm15, %xmm2;
- vpxor %xmm3, %xmm15, %xmm3;
- vpxor %xmm4, %xmm15, %xmm4;
- vpxor %xmm5, %xmm15, %xmm5;
- vpxor %xmm6, %xmm15, %xmm6;
- vpxor %xmm7, %xmm15, %xmm7;
- vpxor %xmm8, %xmm15, %xmm8;
- vpxor %xmm9, %xmm15, %xmm9;
- vpxor %xmm10, %xmm15, %xmm10;
- vpxor %xmm11, %xmm15, %xmm11;
- vpxor %xmm12, %xmm15, %xmm12;
- vpxor %xmm13, %xmm15, %xmm13;
- vpxor 14 * 16(%rax), %xmm15, %xmm14;
- vpxor 15 * 16(%rax), %xmm15, %xmm15;
-
- CALL_NOSPEC r9;
-
- addq $(16 * 16), %rsp;
-
- vpxor 0 * 16(%rsi), %xmm7, %xmm7;
- vpxor 1 * 16(%rsi), %xmm6, %xmm6;
- vpxor 2 * 16(%rsi), %xmm5, %xmm5;
- vpxor 3 * 16(%rsi), %xmm4, %xmm4;
- vpxor 4 * 16(%rsi), %xmm3, %xmm3;
- vpxor 5 * 16(%rsi), %xmm2, %xmm2;
- vpxor 6 * 16(%rsi), %xmm1, %xmm1;
- vpxor 7 * 16(%rsi), %xmm0, %xmm0;
- vpxor 8 * 16(%rsi), %xmm15, %xmm15;
- vpxor 9 * 16(%rsi), %xmm14, %xmm14;
- vpxor 10 * 16(%rsi), %xmm13, %xmm13;
- vpxor 11 * 16(%rsi), %xmm12, %xmm12;
- vpxor 12 * 16(%rsi), %xmm11, %xmm11;
- vpxor 13 * 16(%rsi), %xmm10, %xmm10;
- vpxor 14 * 16(%rsi), %xmm9, %xmm9;
- vpxor 15 * 16(%rsi), %xmm8, %xmm8;
- write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
- %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
- %xmm8, %rsi);
-
- FRAME_END
- ret;
-SYM_FUNC_END(camellia_xts_crypt_16way)
-
-SYM_FUNC_START(camellia_xts_enc_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
- xorl %r8d, %r8d; /* input whitening key, 0 for enc */
-
- leaq __camellia_enc_blk16, %r9;
-
- jmp camellia_xts_crypt_16way;
-SYM_FUNC_END(camellia_xts_enc_16way)
-
-SYM_FUNC_START(camellia_xts_dec_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
-
- cmpl $16, key_length(CTX);
- movl $32, %r8d;
- movl $24, %eax;
- cmovel %eax, %r8d; /* input whitening key, last for dec */
-
- leaq __camellia_dec_blk16, %r9;
-
- jmp camellia_xts_crypt_16way;
-SYM_FUNC_END(camellia_xts_dec_16way)
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 0907243c501c..782e9712a1ec 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -7,7 +7,6 @@
#include <linux/linkage.h>
#include <asm/frame.h>
-#include <asm/nospec-branch.h>
#define CAMELLIA_TABLE_BYTE_LEN 272
@@ -625,16 +624,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.section .rodata.cst16, "aM", @progbits, 16
.align 16
-/* For CTR-mode IV byteswap */
-.Lbswap128_mask:
- .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-
-/* For XTS mode */
-.Lxts_gf128mul_and_shl1_mask_0:
- .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
-.Lxts_gf128mul_and_shl1_mask_1:
- .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
-
/*
* pre-SubByte transform
*
@@ -1061,343 +1050,3 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
FRAME_END
ret;
SYM_FUNC_END(camellia_cbc_dec_32way)
-
-#define inc_le128(x, minus_one, tmp) \
- vpcmpeqq minus_one, x, tmp; \
- vpsubq minus_one, x, x; \
- vpslldq $8, tmp, tmp; \
- vpsubq tmp, x, x;
-
-#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \
- vpcmpeqq minus_one, x, tmp1; \
- vpcmpeqq minus_two, x, tmp2; \
- vpsubq minus_two, x, x; \
- vpor tmp2, tmp1, tmp1; \
- vpslldq $8, tmp1, tmp1; \
- vpsubq tmp1, x, x;
-
-SYM_FUNC_START(camellia_ctr_32way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (32 blocks)
- * %rdx: src (32 blocks)
- * %rcx: iv (little endian, 128bit)
- */
- FRAME_BEGIN
-
- vzeroupper;
-
- movq %rsp, %r10;
- cmpq %rsi, %rdx;
- je .Lctr_use_stack;
-
- /* dst can be used as temporary storage, src is not overwritten. */
- movq %rsi, %rax;
- jmp .Lctr_continue;
-
-.Lctr_use_stack:
- subq $(16 * 32), %rsp;
- movq %rsp, %rax;
-
-.Lctr_continue:
- vpcmpeqd %ymm15, %ymm15, %ymm15;
- vpsrldq $8, %ymm15, %ymm15; /* ab: -1:0 ; cd: -1:0 */
- vpaddq %ymm15, %ymm15, %ymm12; /* ab: -2:0 ; cd: -2:0 */
-
- /* load IV and byteswap */
- vmovdqu (%rcx), %xmm0;
- vmovdqa %xmm0, %xmm1;
- inc_le128(%xmm0, %xmm15, %xmm14);
- vbroadcasti128 .Lbswap128_mask, %ymm14;
- vinserti128 $1, %xmm0, %ymm1, %ymm0;
- vpshufb %ymm14, %ymm0, %ymm13;
- vmovdqu %ymm13, 15 * 32(%rax);
-
- /* construct IVs */
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); /* ab:le2 ; cd:le3 */
- vpshufb %ymm14, %ymm0, %ymm13;
- vmovdqu %ymm13, 14 * 32(%rax);
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm13;
- vmovdqu %ymm13, 13 * 32(%rax);
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm13;
- vmovdqu %ymm13, 12 * 32(%rax);
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm13;
- vmovdqu %ymm13, 11 * 32(%rax);
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm10;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm9;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm8;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm7;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm6;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm5;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm4;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm3;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm2;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm1;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vextracti128 $1, %ymm0, %xmm13;
- vpshufb %ymm14, %ymm0, %ymm0;
- inc_le128(%xmm13, %xmm15, %xmm14);
- vmovdqu %xmm13, (%rcx);
-
- /* inpack32_pre: */
- vpbroadcastq (key_table)(CTX), %ymm15;
- vpshufb .Lpack_bswap, %ymm15, %ymm15;
- vpxor %ymm0, %ymm15, %ymm0;
- vpxor %ymm1, %ymm15, %ymm1;
- vpxor %ymm2, %ymm15, %ymm2;
- vpxor %ymm3, %ymm15, %ymm3;
- vpxor %ymm4, %ymm15, %ymm4;
- vpxor %ymm5, %ymm15, %ymm5;
- vpxor %ymm6, %ymm15, %ymm6;
- vpxor %ymm7, %ymm15, %ymm7;
- vpxor %ymm8, %ymm15, %ymm8;
- vpxor %ymm9, %ymm15, %ymm9;
- vpxor %ymm10, %ymm15, %ymm10;
- vpxor 11 * 32(%rax), %ymm15, %ymm11;
- vpxor 12 * 32(%rax), %ymm15, %ymm12;
- vpxor 13 * 32(%rax), %ymm15, %ymm13;
- vpxor 14 * 32(%rax), %ymm15, %ymm14;
- vpxor 15 * 32(%rax), %ymm15, %ymm15;
-
- call __camellia_enc_blk32;
-
- movq %r10, %rsp;
-
- vpxor 0 * 32(%rdx), %ymm7, %ymm7;
- vpxor 1 * 32(%rdx), %ymm6, %ymm6;
- vpxor 2 * 32(%rdx), %ymm5, %ymm5;
- vpxor 3 * 32(%rdx), %ymm4, %ymm4;
- vpxor 4 * 32(%rdx), %ymm3, %ymm3;
- vpxor 5 * 32(%rdx), %ymm2, %ymm2;
- vpxor 6 * 32(%rdx), %ymm1, %ymm1;
- vpxor 7 * 32(%rdx), %ymm0, %ymm0;
- vpxor 8 * 32(%rdx), %ymm15, %ymm15;
- vpxor 9 * 32(%rdx), %ymm14, %ymm14;
- vpxor 10 * 32(%rdx), %ymm13, %ymm13;
- vpxor 11 * 32(%rdx), %ymm12, %ymm12;
- vpxor 12 * 32(%rdx), %ymm11, %ymm11;
- vpxor 13 * 32(%rdx), %ymm10, %ymm10;
- vpxor 14 * 32(%rdx), %ymm9, %ymm9;
- vpxor 15 * 32(%rdx), %ymm8, %ymm8;
- write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
- %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
- %ymm8, %rsi);
-
- vzeroupper;
-
- FRAME_END
- ret;
-SYM_FUNC_END(camellia_ctr_32way)
-
-#define gf128mul_x_ble(iv, mask, tmp) \
- vpsrad $31, iv, tmp; \
- vpaddq iv, iv, iv; \
- vpshufd $0x13, tmp, tmp; \
- vpand mask, tmp, tmp; \
- vpxor tmp, iv, iv;
-
-#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \
- vpsrad $31, iv, tmp0; \
- vpaddq iv, iv, tmp1; \
- vpsllq $2, iv, iv; \
- vpshufd $0x13, tmp0, tmp0; \
- vpsrad $31, tmp1, tmp1; \
- vpand mask2, tmp0, tmp0; \
- vpshufd $0x13, tmp1, tmp1; \
- vpxor tmp0, iv, iv; \
- vpand mask1, tmp1, tmp1; \
- vpxor tmp1, iv, iv;
-
-.align 8
-SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (32 blocks)
- * %rdx: src (32 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- * %r8: index for input whitening key
- * %r9: pointer to __camellia_enc_blk32 or __camellia_dec_blk32
- */
- FRAME_BEGIN
-
- vzeroupper;
-
- subq $(16 * 32), %rsp;
- movq %rsp, %rax;
-
- vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_0, %ymm12;
-
- /* load IV and construct second IV */
- vmovdqu (%rcx), %xmm0;
- vmovdqa %xmm0, %xmm15;
- gf128mul_x_ble(%xmm0, %xmm12, %xmm13);
- vbroadcasti128 .Lxts_gf128mul_and_shl1_mask_1, %ymm13;
- vinserti128 $1, %xmm0, %ymm15, %ymm0;
- vpxor 0 * 32(%rdx), %ymm0, %ymm15;
- vmovdqu %ymm15, 15 * 32(%rax);
- vmovdqu %ymm0, 0 * 32(%rsi);
-
- /* construct IVs */
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 1 * 32(%rdx), %ymm0, %ymm15;
- vmovdqu %ymm15, 14 * 32(%rax);
- vmovdqu %ymm0, 1 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 2 * 32(%rdx), %ymm0, %ymm15;
- vmovdqu %ymm15, 13 * 32(%rax);
- vmovdqu %ymm0, 2 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 3 * 32(%rdx), %ymm0, %ymm15;
- vmovdqu %ymm15, 12 * 32(%rax);
- vmovdqu %ymm0, 3 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 4 * 32(%rdx), %ymm0, %ymm11;
- vmovdqu %ymm0, 4 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 5 * 32(%rdx), %ymm0, %ymm10;
- vmovdqu %ymm0, 5 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 6 * 32(%rdx), %ymm0, %ymm9;
- vmovdqu %ymm0, 6 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 7 * 32(%rdx), %ymm0, %ymm8;
- vmovdqu %ymm0, 7 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 8 * 32(%rdx), %ymm0, %ymm7;
- vmovdqu %ymm0, 8 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 9 * 32(%rdx), %ymm0, %ymm6;
- vmovdqu %ymm0, 9 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 10 * 32(%rdx), %ymm0, %ymm5;
- vmovdqu %ymm0, 10 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 11 * 32(%rdx), %ymm0, %ymm4;
- vmovdqu %ymm0, 11 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 12 * 32(%rdx), %ymm0, %ymm3;
- vmovdqu %ymm0, 12 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 13 * 32(%rdx), %ymm0, %ymm2;
- vmovdqu %ymm0, 13 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 14 * 32(%rdx), %ymm0, %ymm1;
- vmovdqu %ymm0, 14 * 32(%rsi);
-
- gf128mul_x2_ble(%ymm0, %ymm12, %ymm13, %ymm14, %ymm15);
- vpxor 15 * 32(%rdx), %ymm0, %ymm15;
- vmovdqu %ymm15, 0 * 32(%rax);
- vmovdqu %ymm0, 15 * 32(%rsi);
-
- vextracti128 $1, %ymm0, %xmm0;
- gf128mul_x_ble(%xmm0, %xmm12, %xmm15);
- vmovdqu %xmm0, (%rcx);
-
- /* inpack32_pre: */
- vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15;
- vpshufb .Lpack_bswap, %ymm15, %ymm15;
- vpxor 0 * 32(%rax), %ymm15, %ymm0;
- vpxor %ymm1, %ymm15, %ymm1;
- vpxor %ymm2, %ymm15, %ymm2;
- vpxor %ymm3, %ymm15, %ymm3;
- vpxor %ymm4, %ymm15, %ymm4;
- vpxor %ymm5, %ymm15, %ymm5;
- vpxor %ymm6, %ymm15, %ymm6;
- vpxor %ymm7, %ymm15, %ymm7;
- vpxor %ymm8, %ymm15, %ymm8;
- vpxor %ymm9, %ymm15, %ymm9;
- vpxor %ymm10, %ymm15, %ymm10;
- vpxor %ymm11, %ymm15, %ymm11;
- vpxor 12 * 32(%rax), %ymm15, %ymm12;
- vpxor 13 * 32(%rax), %ymm15, %ymm13;
- vpxor 14 * 32(%rax), %ymm15, %ymm14;
- vpxor 15 * 32(%rax), %ymm15, %ymm15;
-
- CALL_NOSPEC r9;
-
- addq $(16 * 32), %rsp;
-
- vpxor 0 * 32(%rsi), %ymm7, %ymm7;
- vpxor 1 * 32(%rsi), %ymm6, %ymm6;
- vpxor 2 * 32(%rsi), %ymm5, %ymm5;
- vpxor 3 * 32(%rsi), %ymm4, %ymm4;
- vpxor 4 * 32(%rsi), %ymm3, %ymm3;
- vpxor 5 * 32(%rsi), %ymm2, %ymm2;
- vpxor 6 * 32(%rsi), %ymm1, %ymm1;
- vpxor 7 * 32(%rsi), %ymm0, %ymm0;
- vpxor 8 * 32(%rsi), %ymm15, %ymm15;
- vpxor 9 * 32(%rsi), %ymm14, %ymm14;
- vpxor 10 * 32(%rsi), %ymm13, %ymm13;
- vpxor 11 * 32(%rsi), %ymm12, %ymm12;
- vpxor 12 * 32(%rsi), %ymm11, %ymm11;
- vpxor 13 * 32(%rsi), %ymm10, %ymm10;
- vpxor 14 * 32(%rsi), %ymm9, %ymm9;
- vpxor 15 * 32(%rsi), %ymm8, %ymm8;
- write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
- %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
- %ymm8, %rsi);
-
- vzeroupper;
-
- FRAME_END
- ret;
-SYM_FUNC_END(camellia_xts_crypt_32way)
-
-SYM_FUNC_START(camellia_xts_enc_32way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (32 blocks)
- * %rdx: src (32 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
-
- xorl %r8d, %r8d; /* input whitening key, 0 for enc */
-
- leaq __camellia_enc_blk32, %r9;
-
- jmp camellia_xts_crypt_32way;
-SYM_FUNC_END(camellia_xts_enc_32way)
-
-SYM_FUNC_START(camellia_xts_dec_32way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (32 blocks)
- * %rdx: src (32 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
-
- cmpl $16, key_length(CTX);
- movl $32, %r8d;
- movl $24, %eax;
- cmovel %eax, %r8d; /* input whitening key, last for dec */
-
- leaq __camellia_dec_blk32, %r9;
-
- jmp camellia_xts_crypt_32way;
-SYM_FUNC_END(camellia_xts_dec_32way)
diff --git a/arch/x86/crypto/camellia.h b/arch/x86/crypto/camellia.h
new file mode 100644
index 000000000000..1dcea79e8f8e
--- /dev/null
+++ b/arch/x86/crypto/camellia.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_X86_CAMELLIA_H
+#define ASM_X86_CAMELLIA_H
+
+#include <crypto/b128ops.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+
+#define CAMELLIA_MIN_KEY_SIZE 16
+#define CAMELLIA_MAX_KEY_SIZE 32
+#define CAMELLIA_BLOCK_SIZE 16
+#define CAMELLIA_TABLE_BYTE_LEN 272
+#define CAMELLIA_PARALLEL_BLOCKS 2
+
+struct crypto_skcipher;
+
+struct camellia_ctx {
+ u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
+ u32 key_length;
+};
+
+extern int __camellia_setkey(struct camellia_ctx *cctx,
+ const unsigned char *key,
+ unsigned int key_len);
+
+/* regular block cipher functions */
+asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src,
+ bool xor);
+asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src);
+
+/* 2-way parallel cipher functions */
+asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src,
+ bool xor);
+asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src);
+
+/* 16-way parallel cipher functions (avx/aes-ni) */
+asmlinkage void camellia_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void camellia_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+
+asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
+
+static inline void camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src)
+{
+ __camellia_enc_blk(ctx, dst, src, false);
+}
+
+static inline void camellia_enc_blk_xor(const void *ctx, u8 *dst, const u8 *src)
+{
+ __camellia_enc_blk(ctx, dst, src, true);
+}
+
+static inline void camellia_enc_blk_2way(const void *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk_2way(ctx, dst, src, false);
+}
+
+static inline void camellia_enc_blk_xor_2way(const void *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk_2way(ctx, dst, src, true);
+}
+
+/* glue helpers */
+extern void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src);
+
+#endif /* ASM_X86_CAMELLIA_H */
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index ccda647422d6..e7e4d64e9577 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -5,16 +5,16 @@
* Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*/
-#include <asm/crypto/camellia.h>
-#include <asm/crypto/glue_helper.h>
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
-#include <crypto/xts.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/types.h>
+#include "camellia.h"
+#include "ecb_cbc_helpers.h"
+
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
#define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
@@ -23,121 +23,6 @@ asmlinkage void camellia_ecb_enc_32way(const void *ctx, u8 *dst, const u8 *src);
asmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src);
asmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src);
-asmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-
-asmlinkage void camellia_xts_enc_32way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-asmlinkage void camellia_xts_dec_32way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-
-static const struct common_glue_ctx camellia_enc = {
- .num_funcs = 4,
- .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
- .fn_u = { .ecb = camellia_ecb_enc_32way }
- }, {
- .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
- .fn_u = { .ecb = camellia_ecb_enc_16way }
- }, {
- .num_blocks = 2,
- .fn_u = { .ecb = camellia_enc_blk_2way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = camellia_enc_blk }
- } }
-};
-
-static const struct common_glue_ctx camellia_ctr = {
- .num_funcs = 4,
- .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
- .fn_u = { .ctr = camellia_ctr_32way }
- }, {
- .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
- .fn_u = { .ctr = camellia_ctr_16way }
- }, {
- .num_blocks = 2,
- .fn_u = { .ctr = camellia_crypt_ctr_2way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ctr = camellia_crypt_ctr }
- } }
-};
-
-static const struct common_glue_ctx camellia_enc_xts = {
- .num_funcs = 3,
- .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
- .fn_u = { .xts = camellia_xts_enc_32way }
- }, {
- .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
- .fn_u = { .xts = camellia_xts_enc_16way }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = camellia_xts_enc }
- } }
-};
-
-static const struct common_glue_ctx camellia_dec = {
- .num_funcs = 4,
- .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
- .fn_u = { .ecb = camellia_ecb_dec_32way }
- }, {
- .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
- .fn_u = { .ecb = camellia_ecb_dec_16way }
- }, {
- .num_blocks = 2,
- .fn_u = { .ecb = camellia_dec_blk_2way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = camellia_dec_blk }
- } }
-};
-
-static const struct common_glue_ctx camellia_dec_cbc = {
- .num_funcs = 4,
- .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
- .fn_u = { .cbc = camellia_cbc_dec_32way }
- }, {
- .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
- .fn_u = { .cbc = camellia_cbc_dec_16way }
- }, {
- .num_blocks = 2,
- .fn_u = { .cbc = camellia_decrypt_cbc_2way }
- }, {
- .num_blocks = 1,
- .fn_u = { .cbc = camellia_dec_blk }
- } }
-};
-
-static const struct common_glue_ctx camellia_dec_xts = {
- .num_funcs = 3,
- .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
- .fn_u = { .xts = camellia_xts_dec_32way }
- }, {
- .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
- .fn_u = { .xts = camellia_xts_dec_16way }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = camellia_xts_dec }
- } }
-};
static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
@@ -147,45 +32,39 @@ static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&camellia_enc, req);
+ ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS);
+ ECB_BLOCK(CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, camellia_ecb_enc_32way);
+ ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_enc_16way);
+ ECB_BLOCK(2, camellia_enc_blk_2way);
+ ECB_BLOCK(1, camellia_enc_blk);
+ ECB_WALK_END();
}
static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&camellia_dec, req);
+ ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS);
+ ECB_BLOCK(CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, camellia_ecb_dec_32way);
+ ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_dec_16way);
+ ECB_BLOCK(2, camellia_dec_blk_2way);
+ ECB_BLOCK(1, camellia_dec_blk);
+ ECB_WALK_END();
}
static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
+ CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1);
+ CBC_ENC_BLOCK(camellia_enc_blk);
+ CBC_WALK_END();
}
static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
-}
-
-static int ctr_crypt(struct skcipher_request *req)
-{
- return glue_ctr_req_128bit(&camellia_ctr, req);
-}
-
-static int xts_encrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
- &ctx->tweak_ctx, &ctx->crypt_ctx, false);
-}
-
-static int xts_decrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
- &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+ CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS);
+ CBC_DEC_BLOCK(CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS, camellia_cbc_dec_32way);
+ CBC_DEC_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_cbc_dec_16way);
+ CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way);
+ CBC_DEC_BLOCK(1, camellia_dec_blk);
+ CBC_WALK_END();
}
static struct skcipher_alg camellia_algs[] = {
@@ -216,35 +95,6 @@ static struct skcipher_alg camellia_algs[] = {
.setkey = camellia_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- }, {
- .base.cra_name = "__ctr(camellia)",
- .base.cra_driver_name = "__ctr-camellia-aesni-avx2",
- .base.cra_priority = 500,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct camellia_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .chunksize = CAMELLIA_BLOCK_SIZE,
- .setkey = camellia_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- }, {
- .base.cra_name = "__xts(camellia)",
- .base.cra_driver_name = "__xts-camellia-aesni-avx2",
- .base.cra_priority = 500,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct camellia_xts_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = xts_camellia_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
},
};
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 4e5de6ef206e..c7ccf63e741e 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -5,16 +5,16 @@
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*/
-#include <asm/crypto/camellia.h>
-#include <asm/crypto/glue_helper.h>
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
-#include <crypto/xts.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <linux/module.h>
#include <linux/types.h>
+#include "camellia.h"
+#include "ecb_cbc_helpers.h"
+
#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
/* 16-way parallel cipher functions (avx/aes-ni) */
@@ -27,120 +27,6 @@ EXPORT_SYMBOL_GPL(camellia_ecb_dec_16way);
asmlinkage void camellia_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
EXPORT_SYMBOL_GPL(camellia_cbc_dec_16way);
-asmlinkage void camellia_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-EXPORT_SYMBOL_GPL(camellia_ctr_16way);
-
-asmlinkage void camellia_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-EXPORT_SYMBOL_GPL(camellia_xts_enc_16way);
-
-asmlinkage void camellia_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-EXPORT_SYMBOL_GPL(camellia_xts_dec_16way);
-
-void camellia_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
-{
- glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_enc_blk);
-}
-EXPORT_SYMBOL_GPL(camellia_xts_enc);
-
-void camellia_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
-{
- glue_xts_crypt_128bit_one(ctx, dst, src, iv, camellia_dec_blk);
-}
-EXPORT_SYMBOL_GPL(camellia_xts_dec);
-
-static const struct common_glue_ctx camellia_enc = {
- .num_funcs = 3,
- .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
- .fn_u = { .ecb = camellia_ecb_enc_16way }
- }, {
- .num_blocks = 2,
- .fn_u = { .ecb = camellia_enc_blk_2way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = camellia_enc_blk }
- } }
-};
-
-static const struct common_glue_ctx camellia_ctr = {
- .num_funcs = 3,
- .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
- .fn_u = { .ctr = camellia_ctr_16way }
- }, {
- .num_blocks = 2,
- .fn_u = { .ctr = camellia_crypt_ctr_2way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ctr = camellia_crypt_ctr }
- } }
-};
-
-static const struct common_glue_ctx camellia_enc_xts = {
- .num_funcs = 2,
- .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
- .fn_u = { .xts = camellia_xts_enc_16way }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = camellia_xts_enc }
- } }
-};
-
-static const struct common_glue_ctx camellia_dec = {
- .num_funcs = 3,
- .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
- .fn_u = { .ecb = camellia_ecb_dec_16way }
- }, {
- .num_blocks = 2,
- .fn_u = { .ecb = camellia_dec_blk_2way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = camellia_dec_blk }
- } }
-};
-
-static const struct common_glue_ctx camellia_dec_cbc = {
- .num_funcs = 3,
- .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
- .fn_u = { .cbc = camellia_cbc_dec_16way }
- }, {
- .num_blocks = 2,
- .fn_u = { .cbc = camellia_decrypt_cbc_2way }
- }, {
- .num_blocks = 1,
- .fn_u = { .cbc = camellia_dec_blk }
- } }
-};
-
-static const struct common_glue_ctx camellia_dec_xts = {
- .num_funcs = 2,
- .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
- .fn_u = { .xts = camellia_xts_dec_16way }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = camellia_xts_dec }
- } }
-};
-
static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
{
@@ -149,65 +35,36 @@ static int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&camellia_enc, req);
+ ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS);
+ ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_enc_16way);
+ ECB_BLOCK(2, camellia_enc_blk_2way);
+ ECB_BLOCK(1, camellia_enc_blk);
+ ECB_WALK_END();
}
static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&camellia_dec, req);
+ ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS);
+ ECB_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_ecb_dec_16way);
+ ECB_BLOCK(2, camellia_dec_blk_2way);
+ ECB_BLOCK(1, camellia_dec_blk);
+ ECB_WALK_END();
}
static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
+ CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1);
+ CBC_ENC_BLOCK(camellia_enc_blk);
+ CBC_WALK_END();
}
static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
-}
-
-static int ctr_crypt(struct skcipher_request *req)
-{
- return glue_ctr_req_128bit(&camellia_ctr, req);
-}
-
-int xts_camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- /* first half of xts-key is for crypt */
- err = __camellia_setkey(&ctx->crypt_ctx, key, keylen / 2);
- if (err)
- return err;
-
- /* second half of xts-key is for tweak */
- return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-EXPORT_SYMBOL_GPL(xts_camellia_setkey);
-
-static int xts_encrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
- &ctx->tweak_ctx, &ctx->crypt_ctx, false);
-}
-
-static int xts_decrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
- &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+ CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, CAMELLIA_AESNI_PARALLEL_BLOCKS);
+ CBC_DEC_BLOCK(CAMELLIA_AESNI_PARALLEL_BLOCKS, camellia_cbc_dec_16way);
+ CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way);
+ CBC_DEC_BLOCK(1, camellia_dec_blk);
+ CBC_WALK_END();
}
static struct skcipher_alg camellia_algs[] = {
@@ -238,36 +95,7 @@ static struct skcipher_alg camellia_algs[] = {
.setkey = camellia_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- }, {
- .base.cra_name = "__ctr(camellia)",
- .base.cra_driver_name = "__ctr-camellia-aesni",
- .base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct camellia_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .chunksize = CAMELLIA_BLOCK_SIZE,
- .setkey = camellia_setkey,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- }, {
- .base.cra_name = "__xts(camellia)",
- .base.cra_driver_name = "__xts-camellia-aesni",
- .base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct camellia_xts_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = 2 * CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = xts_camellia_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
- },
+ }
};
static struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index 242c056e5fa8..66c435ba9d3d 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -14,8 +14,9 @@
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
-#include <asm/crypto/camellia.h>
-#include <asm/crypto/glue_helper.h>
+
+#include "camellia.h"
+#include "ecb_cbc_helpers.h"
/* regular block cipher functions */
asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src,
@@ -1262,129 +1263,47 @@ static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
return camellia_setkey(&tfm->base, key, key_len);
}
-void camellia_decrypt_cbc_2way(const void *ctx, u8 *d, const u8 *s)
+void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src)
{
- u128 *dst = (u128 *)d;
- const u128 *src = (const u128 *)s;
- u128 iv = *src;
-
- camellia_dec_blk_2way(ctx, (u8 *)dst, (u8 *)src);
+ u8 buf[CAMELLIA_BLOCK_SIZE];
+ const u8 *iv = src;
- u128_xor(&dst[1], &dst[1], &iv);
+ if (dst == src)
+ iv = memcpy(buf, iv, sizeof(buf));
+ camellia_dec_blk_2way(ctx, dst, src);
+ crypto_xor(dst + CAMELLIA_BLOCK_SIZE, iv, CAMELLIA_BLOCK_SIZE);
}
EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way);
-void camellia_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
-{
- be128 ctrblk;
- u128 *dst = (u128 *)d;
- const u128 *src = (const u128 *)s;
-
- if (dst != src)
- *dst = *src;
-
- le128_to_be128(&ctrblk, iv);
- le128_inc(iv);
-
- camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
-}
-EXPORT_SYMBOL_GPL(camellia_crypt_ctr);
-
-void camellia_crypt_ctr_2way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
-{
- be128 ctrblks[2];
- u128 *dst = (u128 *)d;
- const u128 *src = (const u128 *)s;
-
- if (dst != src) {
- dst[0] = src[0];
- dst[1] = src[1];
- }
-
- le128_to_be128(&ctrblks[0], iv);
- le128_inc(iv);
- le128_to_be128(&ctrblks[1], iv);
- le128_inc(iv);
-
- camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks);
-}
-EXPORT_SYMBOL_GPL(camellia_crypt_ctr_2way);
-
-static const struct common_glue_ctx camellia_enc = {
- .num_funcs = 2,
- .fpu_blocks_limit = -1,
-
- .funcs = { {
- .num_blocks = 2,
- .fn_u = { .ecb = camellia_enc_blk_2way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = camellia_enc_blk }
- } }
-};
-
-static const struct common_glue_ctx camellia_ctr = {
- .num_funcs = 2,
- .fpu_blocks_limit = -1,
-
- .funcs = { {
- .num_blocks = 2,
- .fn_u = { .ctr = camellia_crypt_ctr_2way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ctr = camellia_crypt_ctr }
- } }
-};
-
-static const struct common_glue_ctx camellia_dec = {
- .num_funcs = 2,
- .fpu_blocks_limit = -1,
-
- .funcs = { {
- .num_blocks = 2,
- .fn_u = { .ecb = camellia_dec_blk_2way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = camellia_dec_blk }
- } }
-};
-
-static const struct common_glue_ctx camellia_dec_cbc = {
- .num_funcs = 2,
- .fpu_blocks_limit = -1,
-
- .funcs = { {
- .num_blocks = 2,
- .fn_u = { .cbc = camellia_decrypt_cbc_2way }
- }, {
- .num_blocks = 1,
- .fn_u = { .cbc = camellia_dec_blk }
- } }
-};
-
static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&camellia_enc, req);
+ ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1);
+ ECB_BLOCK(2, camellia_enc_blk_2way);
+ ECB_BLOCK(1, camellia_enc_blk);
+ ECB_WALK_END();
}
static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&camellia_dec, req);
+ ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1);
+ ECB_BLOCK(2, camellia_dec_blk_2way);
+ ECB_BLOCK(1, camellia_dec_blk);
+ ECB_WALK_END();
}
static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
+ CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1);
+ CBC_ENC_BLOCK(camellia_enc_blk);
+ CBC_WALK_END();
}
static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
-}
-
-static int ctr_crypt(struct skcipher_request *req)
-{
- return glue_ctr_req_128bit(&camellia_ctr, req);
+ CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1);
+ CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way);
+ CBC_DEC_BLOCK(1, camellia_dec_blk);
+ CBC_WALK_END();
}
static struct crypto_alg camellia_cipher_alg = {
@@ -1433,20 +1352,6 @@ static struct skcipher_alg camellia_skcipher_algs[] = {
.setkey = camellia_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- }, {
- .base.cra_name = "ctr(camellia)",
- .base.cra_driver_name = "ctr-camellia-asm",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct camellia_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .chunksize = CAMELLIA_BLOCK_SIZE,
- .setkey = camellia_setkey_skcipher,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
}
};
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 384ccb00f9e1..3976a87f92ad 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -6,7 +6,6 @@
* <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
*/
-#include <asm/crypto/glue_helper.h>
#include <crypto/algapi.h>
#include <crypto/cast5.h>
#include <crypto/internal/simd.h>
@@ -15,6 +14,8 @@
#include <linux/module.h>
#include <linux/types.h>
+#include "ecb_cbc_helpers.h"
+
#define CAST5_PARALLEL_BLOCKS 16
asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
@@ -23,8 +24,6 @@ asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src);
-asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
- __be64 *iv);
static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
unsigned int keylen)
@@ -32,272 +31,35 @@ static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
return cast5_setkey(&tfm->base, key, keylen);
}
-static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk,
- unsigned int nbytes)
-{
- return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
- walk, fpu_enabled, nbytes);
-}
-
-static inline void cast5_fpu_end(bool fpu_enabled)
-{
- return glue_fpu_end(fpu_enabled);
-}
-
-static int ecb_crypt(struct skcipher_request *req, bool enc)
-{
- bool fpu_enabled = false;
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- const unsigned int bsize = CAST5_BLOCK_SIZE;
- unsigned int nbytes;
- void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((nbytes = walk.nbytes)) {
- u8 *wsrc = walk.src.virt.addr;
- u8 *wdst = walk.dst.virt.addr;
-
- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
-
- /* Process multi-block batch */
- if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
- fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
- do {
- fn(ctx, wdst, wsrc);
-
- wsrc += bsize * CAST5_PARALLEL_BLOCKS;
- wdst += bsize * CAST5_PARALLEL_BLOCKS;
- nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
- } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
-
- if (nbytes < bsize)
- goto done;
- }
-
- fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
-
- /* Handle leftovers */
- do {
- fn(ctx, wdst, wsrc);
-
- wsrc += bsize;
- wdst += bsize;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
-done:
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- cast5_fpu_end(fpu_enabled);
- return err;
-}
-
static int ecb_encrypt(struct skcipher_request *req)
{
- return ecb_crypt(req, true);
+ ECB_WALK_START(req, CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS);
+ ECB_BLOCK(CAST5_PARALLEL_BLOCKS, cast5_ecb_enc_16way);
+ ECB_BLOCK(1, __cast5_encrypt);
+ ECB_WALK_END();
}
static int ecb_decrypt(struct skcipher_request *req)
{
- return ecb_crypt(req, false);
+ ECB_WALK_START(req, CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS);
+ ECB_BLOCK(CAST5_PARALLEL_BLOCKS, cast5_ecb_dec_16way);
+ ECB_BLOCK(1, __cast5_decrypt);
+ ECB_WALK_END();
}
static int cbc_encrypt(struct skcipher_request *req)
{
- const unsigned int bsize = CAST5_BLOCK_SIZE;
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((nbytes = walk.nbytes)) {
- u64 *src = (u64 *)walk.src.virt.addr;
- u64 *dst = (u64 *)walk.dst.virt.addr;
- u64 *iv = (u64 *)walk.iv;
-
- do {
- *dst = *src ^ *iv;
- __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
- iv = dst;
- src++;
- dst++;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
- *(u64 *)walk.iv = *iv;
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- return err;
-}
-
-static unsigned int __cbc_decrypt(struct cast5_ctx *ctx,
- struct skcipher_walk *walk)
-{
- const unsigned int bsize = CAST5_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u64 *src = (u64 *)walk->src.virt.addr;
- u64 *dst = (u64 *)walk->dst.virt.addr;
- u64 last_iv;
-
- /* Start of the last block. */
- src += nbytes / bsize - 1;
- dst += nbytes / bsize - 1;
-
- last_iv = *src;
-
- /* Process multi-block batch */
- if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
- do {
- nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
- src -= CAST5_PARALLEL_BLOCKS - 1;
- dst -= CAST5_PARALLEL_BLOCKS - 1;
-
- cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
-
- nbytes -= bsize;
- if (nbytes < bsize)
- goto done;
-
- *dst ^= *(src - 1);
- src -= 1;
- dst -= 1;
- } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
- }
-
- /* Handle leftovers */
- for (;;) {
- __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
-
- nbytes -= bsize;
- if (nbytes < bsize)
- break;
-
- *dst ^= *(src - 1);
- src -= 1;
- dst -= 1;
- }
-
-done:
- *dst ^= *(u64 *)walk->iv;
- *(u64 *)walk->iv = last_iv;
-
- return nbytes;
+ CBC_WALK_START(req, CAST5_BLOCK_SIZE, -1);
+ CBC_ENC_BLOCK(__cast5_encrypt);
+ CBC_WALK_END();
}
static int cbc_decrypt(struct skcipher_request *req)
{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
- bool fpu_enabled = false;
- struct skcipher_walk walk;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((nbytes = walk.nbytes)) {
- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
- nbytes = __cbc_decrypt(ctx, &walk);
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- cast5_fpu_end(fpu_enabled);
- return err;
-}
-
-static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx)
-{
- u8 *ctrblk = walk->iv;
- u8 keystream[CAST5_BLOCK_SIZE];
- u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
- unsigned int nbytes = walk->nbytes;
-
- __cast5_encrypt(ctx, keystream, ctrblk);
- crypto_xor_cpy(dst, keystream, src, nbytes);
-
- crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
-}
-
-static unsigned int __ctr_crypt(struct skcipher_walk *walk,
- struct cast5_ctx *ctx)
-{
- const unsigned int bsize = CAST5_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- u64 *src = (u64 *)walk->src.virt.addr;
- u64 *dst = (u64 *)walk->dst.virt.addr;
-
- /* Process multi-block batch */
- if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
- do {
- cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
- (__be64 *)walk->iv);
-
- src += CAST5_PARALLEL_BLOCKS;
- dst += CAST5_PARALLEL_BLOCKS;
- nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
- } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- u64 ctrblk;
-
- if (dst != src)
- *dst = *src;
-
- ctrblk = *(u64 *)walk->iv;
- be64_add_cpu((__be64 *)walk->iv, 1);
-
- __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
- *dst ^= ctrblk;
-
- src += 1;
- dst += 1;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
-done:
- return nbytes;
-}
-
-static int ctr_crypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
- bool fpu_enabled = false;
- struct skcipher_walk walk;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
- fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
- nbytes = __ctr_crypt(&walk, ctx);
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- cast5_fpu_end(fpu_enabled);
-
- if (walk.nbytes) {
- ctr_crypt_final(&walk, ctx);
- err = skcipher_walk_done(&walk, 0);
- }
-
- return err;
+ CBC_WALK_START(req, CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS);
+ CBC_DEC_BLOCK(CAST5_PARALLEL_BLOCKS, cast5_cbc_dec_16way);
+ CBC_DEC_BLOCK(1, __cast5_decrypt);
+ CBC_WALK_END();
}
static struct skcipher_alg cast5_algs[] = {
@@ -328,21 +90,6 @@ static struct skcipher_alg cast5_algs[] = {
.setkey = cast5_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- }, {
- .base.cra_name = "__ctr(cast5)",
- .base.cra_driver_name = "__ctr-cast5-avx",
- .base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct cast5_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = CAST5_MIN_KEY_SIZE,
- .max_keysize = CAST5_MAX_KEY_SIZE,
- .ivsize = CAST5_BLOCK_SIZE,
- .chunksize = CAST5_BLOCK_SIZE,
- .setkey = cast5_setkey_skcipher,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
}
};
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 932a3ce32a88..fbddcecc3e3f 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -212,8 +212,6 @@
.section .rodata.cst16, "aM", @progbits, 16
.align 16
-.Lxts_gf128mul_and_shl1_mask:
- .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
.Lbswap_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.Lbswap128_mask:
@@ -412,85 +410,3 @@ SYM_FUNC_START(cast6_cbc_dec_8way)
FRAME_END
ret;
SYM_FUNC_END(cast6_cbc_dec_8way)
-
-SYM_FUNC_START(cast6_ctr_8way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: iv (little endian, 128bit)
- */
- FRAME_BEGIN
- pushq %r12;
- pushq %r15
-
- movq %rdi, CTX;
- movq %rsi, %r11;
- movq %rdx, %r12;
-
- load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
- RD2, RX, RKR, RKM);
-
- call __cast6_enc_blk8;
-
- store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
- popq %r15;
- popq %r12;
- FRAME_END
- ret;
-SYM_FUNC_END(cast6_ctr_8way)
-
-SYM_FUNC_START(cast6_xts_enc_8way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
- FRAME_BEGIN
- pushq %r15;
-
- movq %rdi, CTX
- movq %rsi, %r11;
-
- /* regs <= src, dst <= IVs, regs <= regs xor IVs */
- load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
- RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
-
- call __cast6_enc_blk8;
-
- /* dst <= regs xor IVs(in dst) */
- store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
- popq %r15;
- FRAME_END
- ret;
-SYM_FUNC_END(cast6_xts_enc_8way)
-
-SYM_FUNC_START(cast6_xts_dec_8way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
- FRAME_BEGIN
- pushq %r15;
-
- movq %rdi, CTX
- movq %rsi, %r11;
-
- /* regs <= src, dst <= IVs, regs <= regs xor IVs */
- load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
- RX, RKR, RKM, .Lxts_gf128mul_and_shl1_mask);
-
- call __cast6_dec_blk8;
-
- /* dst <= regs xor IVs(in dst) */
- store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
- popq %r15;
- FRAME_END
- ret;
-SYM_FUNC_END(cast6_xts_dec_8way)
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 48e0f37796fa..7e2aea372349 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -15,8 +15,8 @@
#include <crypto/algapi.h>
#include <crypto/cast6.h>
#include <crypto/internal/simd.h>
-#include <crypto/xts.h>
-#include <asm/crypto/glue_helper.h>
+
+#include "ecb_cbc_helpers.h"
#define CAST6_PARALLEL_BLOCKS 8
@@ -24,13 +24,6 @@ asmlinkage void cast6_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src);
asmlinkage void cast6_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src);
asmlinkage void cast6_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src);
-asmlinkage void cast6_ctr_8way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-
-asmlinkage void cast6_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-asmlinkage void cast6_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
static int cast6_setkey_skcipher(struct crypto_skcipher *tfm,
const u8 *key, unsigned int keylen)
@@ -38,172 +31,35 @@ static int cast6_setkey_skcipher(struct crypto_skcipher *tfm,
return cast6_setkey(&tfm->base, key, keylen);
}
-static void cast6_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
-{
- glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_encrypt);
-}
-
-static void cast6_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
-{
- glue_xts_crypt_128bit_one(ctx, dst, src, iv, __cast6_decrypt);
-}
-
-static void cast6_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
-{
- be128 ctrblk;
- u128 *dst = (u128 *)d;
- const u128 *src = (const u128 *)s;
-
- le128_to_be128(&ctrblk, iv);
- le128_inc(iv);
-
- __cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
- u128_xor(dst, src, (u128 *)&ctrblk);
-}
-
-static const struct common_glue_ctx cast6_enc = {
- .num_funcs = 2,
- .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAST6_PARALLEL_BLOCKS,
- .fn_u = { .ecb = cast6_ecb_enc_8way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = __cast6_encrypt }
- } }
-};
-
-static const struct common_glue_ctx cast6_ctr = {
- .num_funcs = 2,
- .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAST6_PARALLEL_BLOCKS,
- .fn_u = { .ctr = cast6_ctr_8way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ctr = cast6_crypt_ctr }
- } }
-};
-
-static const struct common_glue_ctx cast6_enc_xts = {
- .num_funcs = 2,
- .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAST6_PARALLEL_BLOCKS,
- .fn_u = { .xts = cast6_xts_enc_8way }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = cast6_xts_enc }
- } }
-};
-
-static const struct common_glue_ctx cast6_dec = {
- .num_funcs = 2,
- .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAST6_PARALLEL_BLOCKS,
- .fn_u = { .ecb = cast6_ecb_dec_8way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = __cast6_decrypt }
- } }
-};
-
-static const struct common_glue_ctx cast6_dec_cbc = {
- .num_funcs = 2,
- .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAST6_PARALLEL_BLOCKS,
- .fn_u = { .cbc = cast6_cbc_dec_8way }
- }, {
- .num_blocks = 1,
- .fn_u = { .cbc = __cast6_decrypt }
- } }
-};
-
-static const struct common_glue_ctx cast6_dec_xts = {
- .num_funcs = 2,
- .fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = CAST6_PARALLEL_BLOCKS,
- .fn_u = { .xts = cast6_xts_dec_8way }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = cast6_xts_dec }
- } }
-};
-
static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&cast6_enc, req);
+ ECB_WALK_START(req, CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS);
+ ECB_BLOCK(CAST6_PARALLEL_BLOCKS, cast6_ecb_enc_8way);
+ ECB_BLOCK(1, __cast6_encrypt);
+ ECB_WALK_END();
}
static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&cast6_dec, req);
+ ECB_WALK_START(req, CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS);
+ ECB_BLOCK(CAST6_PARALLEL_BLOCKS, cast6_ecb_dec_8way);
+ ECB_BLOCK(1, __cast6_decrypt);
+ ECB_WALK_END();
}
static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_req_128bit(__cast6_encrypt, req);
+ CBC_WALK_START(req, CAST6_BLOCK_SIZE, -1);
+ CBC_ENC_BLOCK(__cast6_encrypt);
+ CBC_WALK_END();
}
static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_req_128bit(&cast6_dec_cbc, req);
-}
-
-static int ctr_crypt(struct skcipher_request *req)
-{
- return glue_ctr_req_128bit(&cast6_ctr, req);
-}
-
-struct cast6_xts_ctx {
- struct cast6_ctx tweak_ctx;
- struct cast6_ctx crypt_ctx;
-};
-
-static int xts_cast6_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- /* first half of xts-key is for crypt */
- err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2);
- if (err)
- return err;
-
- /* second half of xts-key is for tweak */
- return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-
-static int xts_encrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&cast6_enc_xts, req, __cast6_encrypt,
- &ctx->tweak_ctx, &ctx->crypt_ctx, false);
-}
-
-static int xts_decrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct cast6_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&cast6_dec_xts, req, __cast6_encrypt,
- &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+ CBC_WALK_START(req, CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS);
+ CBC_DEC_BLOCK(CAST6_PARALLEL_BLOCKS, cast6_cbc_dec_8way);
+ CBC_DEC_BLOCK(1, __cast6_decrypt);
+ CBC_WALK_END();
}
static struct skcipher_alg cast6_algs[] = {
@@ -234,35 +90,6 @@ static struct skcipher_alg cast6_algs[] = {
.setkey = cast6_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- }, {
- .base.cra_name = "__ctr(cast6)",
- .base.cra_driver_name = "__ctr-cast6-avx",
- .base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct cast6_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = CAST6_MIN_KEY_SIZE,
- .max_keysize = CAST6_MAX_KEY_SIZE,
- .ivsize = CAST6_BLOCK_SIZE,
- .chunksize = CAST6_BLOCK_SIZE,
- .setkey = cast6_setkey_skcipher,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- }, {
- .base.cra_name = "__xts(cast6)",
- .base.cra_driver_name = "__xts-cast6-avx",
- .base.cra_priority = 200,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = CAST6_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct cast6_xts_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * CAST6_MIN_KEY_SIZE,
- .max_keysize = 2 * CAST6_MAX_KEY_SIZE,
- .ivsize = CAST6_BLOCK_SIZE,
- .setkey = xts_cast6_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
},
};
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index 89830e531350..e7cb68a3db3b 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -6,8 +6,6 @@
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*/
#include <crypto/algapi.h>
@@ -253,94 +251,6 @@ static int cbc_decrypt(struct skcipher_request *req)
return err;
}
-static void ctr_crypt_final(struct des3_ede_x86_ctx *ctx,
- struct skcipher_walk *walk)
-{
- u8 *ctrblk = walk->iv;
- u8 keystream[DES3_EDE_BLOCK_SIZE];
- u8 *src = walk->src.virt.addr;
- u8 *dst = walk->dst.virt.addr;
- unsigned int nbytes = walk->nbytes;
-
- des3_ede_enc_blk(ctx, keystream, ctrblk);
- crypto_xor_cpy(dst, keystream, src, nbytes);
-
- crypto_inc(ctrblk, DES3_EDE_BLOCK_SIZE);
-}
-
-static unsigned int __ctr_crypt(struct des3_ede_x86_ctx *ctx,
- struct skcipher_walk *walk)
-{
- unsigned int bsize = DES3_EDE_BLOCK_SIZE;
- unsigned int nbytes = walk->nbytes;
- __be64 *src = (__be64 *)walk->src.virt.addr;
- __be64 *dst = (__be64 *)walk->dst.virt.addr;
- u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
- __be64 ctrblocks[3];
-
- /* Process four block batch */
- if (nbytes >= bsize * 3) {
- do {
- /* create ctrblks for parallel encrypt */
- ctrblocks[0] = cpu_to_be64(ctrblk++);
- ctrblocks[1] = cpu_to_be64(ctrblk++);
- ctrblocks[2] = cpu_to_be64(ctrblk++);
-
- des3_ede_enc_blk_3way(ctx, (u8 *)ctrblocks,
- (u8 *)ctrblocks);
-
- dst[0] = src[0] ^ ctrblocks[0];
- dst[1] = src[1] ^ ctrblocks[1];
- dst[2] = src[2] ^ ctrblocks[2];
-
- src += 3;
- dst += 3;
- } while ((nbytes -= bsize * 3) >= bsize * 3);
-
- if (nbytes < bsize)
- goto done;
- }
-
- /* Handle leftovers */
- do {
- ctrblocks[0] = cpu_to_be64(ctrblk++);
-
- des3_ede_enc_blk(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
-
- dst[0] = src[0] ^ ctrblocks[0];
-
- src += 1;
- dst += 1;
- } while ((nbytes -= bsize) >= bsize);
-
-done:
- *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
- return nbytes;
-}
-
-static int ctr_crypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((nbytes = walk.nbytes) >= DES3_EDE_BLOCK_SIZE) {
- nbytes = __ctr_crypt(ctx, &walk);
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- if (nbytes) {
- ctr_crypt_final(ctx, &walk);
- err = skcipher_walk_done(&walk, 0);
- }
-
- return err;
-}
-
static int des3_ede_x86_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
@@ -428,20 +338,6 @@ static struct skcipher_alg des3_ede_skciphers[] = {
.setkey = des3_ede_x86_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- }, {
- .base.cra_name = "ctr(des3_ede)",
- .base.cra_driver_name = "ctr-des3_ede-asm",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct des3_ede_x86_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = DES3_EDE_KEY_SIZE,
- .max_keysize = DES3_EDE_KEY_SIZE,
- .ivsize = DES3_EDE_BLOCK_SIZE,
- .chunksize = DES3_EDE_BLOCK_SIZE,
- .setkey = des3_ede_x86_setkey_skcipher,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
}
};
diff --git a/arch/x86/crypto/ecb_cbc_helpers.h b/arch/x86/crypto/ecb_cbc_helpers.h
new file mode 100644
index 000000000000..eaa15c7b29d6
--- /dev/null
+++ b/arch/x86/crypto/ecb_cbc_helpers.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _CRYPTO_ECB_CBC_HELPER_H
+#define _CRYPTO_ECB_CBC_HELPER_H
+
+#include <crypto/internal/skcipher.h>
+#include <asm/fpu/api.h>
+
+/*
+ * Mode helpers to instantiate parameterized skcipher ECB/CBC modes without
+ * having to rely on indirect calls and retpolines.
+ */
+
+#define ECB_WALK_START(req, bsize, fpu_blocks) do { \
+ void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); \
+ const int __bsize = (bsize); \
+ struct skcipher_walk walk; \
+ int err = skcipher_walk_virt(&walk, (req), false); \
+ while (walk.nbytes > 0) { \
+ unsigned int nbytes = walk.nbytes; \
+ bool do_fpu = (fpu_blocks) != -1 && \
+ nbytes >= (fpu_blocks) * __bsize; \
+ const u8 *src = walk.src.virt.addr; \
+ u8 *dst = walk.dst.virt.addr; \
+ u8 __maybe_unused buf[(bsize)]; \
+ if (do_fpu) kernel_fpu_begin()
+
+#define CBC_WALK_START(req, bsize, fpu_blocks) \
+ ECB_WALK_START(req, bsize, fpu_blocks)
+
+#define ECB_WALK_ADVANCE(blocks) do { \
+ dst += (blocks) * __bsize; \
+ src += (blocks) * __bsize; \
+ nbytes -= (blocks) * __bsize; \
+} while (0)
+
+#define ECB_BLOCK(blocks, func) do { \
+ while (nbytes >= (blocks) * __bsize) { \
+ (func)(ctx, dst, src); \
+ ECB_WALK_ADVANCE(blocks); \
+ } \
+} while (0)
+
+#define CBC_ENC_BLOCK(func) do { \
+ const u8 *__iv = walk.iv; \
+ while (nbytes >= __bsize) { \
+ crypto_xor_cpy(dst, src, __iv, __bsize); \
+ (func)(ctx, dst, dst); \
+ __iv = dst; \
+ ECB_WALK_ADVANCE(1); \
+ } \
+ memcpy(walk.iv, __iv, __bsize); \
+} while (0)
+
+#define CBC_DEC_BLOCK(blocks, func) do { \
+ while (nbytes >= (blocks) * __bsize) { \
+ const u8 *__iv = src + ((blocks) - 1) * __bsize; \
+ if (dst == src) \
+ __iv = memcpy(buf, __iv, __bsize); \
+ (func)(ctx, dst, src); \
+ crypto_xor(dst, walk.iv, __bsize); \
+ memcpy(walk.iv, __iv, __bsize); \
+ ECB_WALK_ADVANCE(blocks); \
+ } \
+} while (0)
+
+#define ECB_WALK_END() \
+ if (do_fpu) kernel_fpu_end(); \
+ err = skcipher_walk_done(&walk, nbytes); \
+ } \
+ return err; \
+} while (0)
+
+#define CBC_WALK_END() ECB_WALK_END()
+
+#endif
diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S
index d08fc575ef7f..3da385271227 100644
--- a/arch/x86/crypto/glue_helper-asm-avx.S
+++ b/arch/x86/crypto/glue_helper-asm-avx.S
@@ -34,107 +34,3 @@
vpxor (5*16)(src), x6, x6; \
vpxor (6*16)(src), x7, x7; \
store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
-
-#define inc_le128(x, minus_one, tmp) \
- vpcmpeqq minus_one, x, tmp; \
- vpsubq minus_one, x, x; \
- vpslldq $8, tmp, tmp; \
- vpsubq tmp, x, x;
-
-#define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \
- vpcmpeqd t0, t0, t0; \
- vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \
- vmovdqa bswap, t1; \
- \
- /* load IV and byteswap */ \
- vmovdqu (iv), x7; \
- vpshufb t1, x7, x0; \
- \
- /* construct IVs */ \
- inc_le128(x7, t0, t2); \
- vpshufb t1, x7, x1; \
- inc_le128(x7, t0, t2); \
- vpshufb t1, x7, x2; \
- inc_le128(x7, t0, t2); \
- vpshufb t1, x7, x3; \
- inc_le128(x7, t0, t2); \
- vpshufb t1, x7, x4; \
- inc_le128(x7, t0, t2); \
- vpshufb t1, x7, x5; \
- inc_le128(x7, t0, t2); \
- vpshufb t1, x7, x6; \
- inc_le128(x7, t0, t2); \
- vmovdqa x7, t2; \
- vpshufb t1, x7, x7; \
- inc_le128(t2, t0, t1); \
- vmovdqu t2, (iv);
-
-#define store_ctr_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
- vpxor (0*16)(src), x0, x0; \
- vpxor (1*16)(src), x1, x1; \
- vpxor (2*16)(src), x2, x2; \
- vpxor (3*16)(src), x3, x3; \
- vpxor (4*16)(src), x4, x4; \
- vpxor (5*16)(src), x5, x5; \
- vpxor (6*16)(src), x6, x6; \
- vpxor (7*16)(src), x7, x7; \
- store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
-
-#define gf128mul_x_ble(iv, mask, tmp) \
- vpsrad $31, iv, tmp; \
- vpaddq iv, iv, iv; \
- vpshufd $0x13, tmp, tmp; \
- vpand mask, tmp, tmp; \
- vpxor tmp, iv, iv;
-
-#define load_xts_8way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, t0, \
- t1, xts_gf128mul_and_shl1_mask) \
- vmovdqa xts_gf128mul_and_shl1_mask, t0; \
- \
- /* load IV */ \
- vmovdqu (iv), tiv; \
- vpxor (0*16)(src), tiv, x0; \
- vmovdqu tiv, (0*16)(dst); \
- \
- /* construct and store IVs, also xor with source */ \
- gf128mul_x_ble(tiv, t0, t1); \
- vpxor (1*16)(src), tiv, x1; \
- vmovdqu tiv, (1*16)(dst); \
- \
- gf128mul_x_ble(tiv, t0, t1); \
- vpxor (2*16)(src), tiv, x2; \
- vmovdqu tiv, (2*16)(dst); \
- \
- gf128mul_x_ble(tiv, t0, t1); \
- vpxor (3*16)(src), tiv, x3; \
- vmovdqu tiv, (3*16)(dst); \
- \
- gf128mul_x_ble(tiv, t0, t1); \
- vpxor (4*16)(src), tiv, x4; \
- vmovdqu tiv, (4*16)(dst); \
- \
- gf128mul_x_ble(tiv, t0, t1); \
- vpxor (5*16)(src), tiv, x5; \
- vmovdqu tiv, (5*16)(dst); \
- \
- gf128mul_x_ble(tiv, t0, t1); \
- vpxor (6*16)(src), tiv, x6; \
- vmovdqu tiv, (6*16)(dst); \
- \
- gf128mul_x_ble(tiv, t0, t1); \
- vpxor (7*16)(src), tiv, x7; \
- vmovdqu tiv, (7*16)(dst); \
- \
- gf128mul_x_ble(tiv, t0, t1); \
- vmovdqu tiv, (iv);
-
-#define store_xts_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
- vpxor (0*16)(dst), x0, x0; \
- vpxor (1*16)(dst), x1, x1; \
- vpxor (2*16)(dst), x2, x2; \
- vpxor (3*16)(dst), x3, x3; \
- vpxor (4*16)(dst), x4, x4; \
- vpxor (5*16)(dst), x5, x5; \
- vpxor (6*16)(dst), x6, x6; \
- vpxor (7*16)(dst), x7, x7; \
- store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
diff --git a/arch/x86/crypto/glue_helper-asm-avx2.S b/arch/x86/crypto/glue_helper-asm-avx2.S
index d84508c85c13..c77e9049431f 100644
--- a/arch/x86/crypto/glue_helper-asm-avx2.S
+++ b/arch/x86/crypto/glue_helper-asm-avx2.S
@@ -37,139 +37,3 @@
vpxor (5*32+16)(src), x6, x6; \
vpxor (6*32+16)(src), x7, x7; \
store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
-
-#define inc_le128(x, minus_one, tmp) \
- vpcmpeqq minus_one, x, tmp; \
- vpsubq minus_one, x, x; \
- vpslldq $8, tmp, tmp; \
- vpsubq tmp, x, x;
-
-#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \
- vpcmpeqq minus_one, x, tmp1; \
- vpcmpeqq minus_two, x, tmp2; \
- vpsubq minus_two, x, x; \
- vpor tmp2, tmp1, tmp1; \
- vpslldq $8, tmp1, tmp1; \
- vpsubq tmp1, x, x;
-
-#define load_ctr_16way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t0x, t1, \
- t1x, t2, t2x, t3, t3x, t4, t5) \
- vpcmpeqd t0, t0, t0; \
- vpsrldq $8, t0, t0; /* ab: -1:0 ; cd: -1:0 */ \
- vpaddq t0, t0, t4; /* ab: -2:0 ; cd: -2:0 */\
- \
- /* load IV and byteswap */ \
- vmovdqu (iv), t2x; \
- vmovdqa t2x, t3x; \
- inc_le128(t2x, t0x, t1x); \
- vbroadcasti128 bswap, t1; \
- vinserti128 $1, t2x, t3, t2; /* ab: le0 ; cd: le1 */ \
- vpshufb t1, t2, x0; \
- \
- /* construct IVs */ \
- add2_le128(t2, t0, t4, t3, t5); /* ab: le2 ; cd: le3 */ \
- vpshufb t1, t2, x1; \
- add2_le128(t2, t0, t4, t3, t5); \
- vpshufb t1, t2, x2; \
- add2_le128(t2, t0, t4, t3, t5); \
- vpshufb t1, t2, x3; \
- add2_le128(t2, t0, t4, t3, t5); \
- vpshufb t1, t2, x4; \
- add2_le128(t2, t0, t4, t3, t5); \
- vpshufb t1, t2, x5; \
- add2_le128(t2, t0, t4, t3, t5); \
- vpshufb t1, t2, x6; \
- add2_le128(t2, t0, t4, t3, t5); \
- vpshufb t1, t2, x7; \
- vextracti128 $1, t2, t2x; \
- inc_le128(t2x, t0x, t3x); \
- vmovdqu t2x, (iv);
-
-#define store_ctr_16way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
- vpxor (0*32)(src), x0, x0; \
- vpxor (1*32)(src), x1, x1; \
- vpxor (2*32)(src), x2, x2; \
- vpxor (3*32)(src), x3, x3; \
- vpxor (4*32)(src), x4, x4; \
- vpxor (5*32)(src), x5, x5; \
- vpxor (6*32)(src), x6, x6; \
- vpxor (7*32)(src), x7, x7; \
- store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
-
-#define gf128mul_x_ble(iv, mask, tmp) \
- vpsrad $31, iv, tmp; \
- vpaddq iv, iv, iv; \
- vpshufd $0x13, tmp, tmp; \
- vpand mask, tmp, tmp; \
- vpxor tmp, iv, iv;
-
-#define gf128mul_x2_ble(iv, mask1, mask2, tmp0, tmp1) \
- vpsrad $31, iv, tmp0; \
- vpaddq iv, iv, tmp1; \
- vpsllq $2, iv, iv; \
- vpshufd $0x13, tmp0, tmp0; \
- vpsrad $31, tmp1, tmp1; \
- vpand mask2, tmp0, tmp0; \
- vpshufd $0x13, tmp1, tmp1; \
- vpxor tmp0, iv, iv; \
- vpand mask1, tmp1, tmp1; \
- vpxor tmp1, iv, iv;
-
-#define load_xts_16way(iv, src, dst, x0, x1, x2, x3, x4, x5, x6, x7, tiv, \
- tivx, t0, t0x, t1, t1x, t2, t2x, t3, \
- xts_gf128mul_and_shl1_mask_0, \
- xts_gf128mul_and_shl1_mask_1) \
- vbroadcasti128 xts_gf128mul_and_shl1_mask_0, t1; \
- \
- /* load IV and construct second IV */ \
- vmovdqu (iv), tivx; \
- vmovdqa tivx, t0x; \
- gf128mul_x_ble(tivx, t1x, t2x); \
- vbroadcasti128 xts_gf128mul_and_shl1_mask_1, t2; \
- vinserti128 $1, tivx, t0, tiv; \
- vpxor (0*32)(src), tiv, x0; \
- vmovdqu tiv, (0*32)(dst); \
- \
- /* construct and store IVs, also xor with source */ \
- gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
- vpxor (1*32)(src), tiv, x1; \
- vmovdqu tiv, (1*32)(dst); \
- \
- gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
- vpxor (2*32)(src), tiv, x2; \
- vmovdqu tiv, (2*32)(dst); \
- \
- gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
- vpxor (3*32)(src), tiv, x3; \
- vmovdqu tiv, (3*32)(dst); \
- \
- gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
- vpxor (4*32)(src), tiv, x4; \
- vmovdqu tiv, (4*32)(dst); \
- \
- gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
- vpxor (5*32)(src), tiv, x5; \
- vmovdqu tiv, (5*32)(dst); \
- \
- gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
- vpxor (6*32)(src), tiv, x6; \
- vmovdqu tiv, (6*32)(dst); \
- \
- gf128mul_x2_ble(tiv, t1, t2, t0, t3); \
- vpxor (7*32)(src), tiv, x7; \
- vmovdqu tiv, (7*32)(dst); \
- \
- vextracti128 $1, tiv, tivx; \
- gf128mul_x_ble(tivx, t1x, t2x); \
- vmovdqu tivx, (iv);
-
-#define store_xts_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
- vpxor (0*32)(dst), x0, x0; \
- vpxor (1*32)(dst), x1, x1; \
- vpxor (2*32)(dst), x2, x2; \
- vpxor (3*32)(dst), x3, x3; \
- vpxor (4*32)(dst), x4, x4; \
- vpxor (5*32)(dst), x5, x5; \
- vpxor (6*32)(dst), x6, x6; \
- vpxor (7*32)(dst), x7, x7; \
- store_16way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
deleted file mode 100644
index d3d91a0abf88..000000000000
--- a/arch/x86/crypto/glue_helper.c
+++ /dev/null
@@ -1,381 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Shared glue code for 128bit block ciphers
- *
- * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
- *
- * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
- * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
- */
-
-#include <linux/module.h>
-#include <crypto/b128ops.h>
-#include <crypto/gf128mul.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/scatterwalk.h>
-#include <crypto/xts.h>
-#include <asm/crypto/glue_helper.h>
-
-int glue_ecb_req_128bit(const struct common_glue_ctx *gctx,
- struct skcipher_request *req)
-{
- void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
- const unsigned int bsize = 128 / 8;
- struct skcipher_walk walk;
- bool fpu_enabled = false;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((nbytes = walk.nbytes)) {
- const u8 *src = walk.src.virt.addr;
- u8 *dst = walk.dst.virt.addr;
- unsigned int func_bytes;
- unsigned int i;
-
- fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled, nbytes);
- for (i = 0; i < gctx->num_funcs; i++) {
- func_bytes = bsize * gctx->funcs[i].num_blocks;
-
- if (nbytes < func_bytes)
- continue;
-
- /* Process multi-block batch */
- do {
- gctx->funcs[i].fn_u.ecb(ctx, dst, src);
- src += func_bytes;
- dst += func_bytes;
- nbytes -= func_bytes;
- } while (nbytes >= func_bytes);
-
- if (nbytes < bsize)
- break;
- }
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- glue_fpu_end(fpu_enabled);
- return err;
-}
-EXPORT_SYMBOL_GPL(glue_ecb_req_128bit);
-
-int glue_cbc_encrypt_req_128bit(const common_glue_func_t fn,
- struct skcipher_request *req)
-{
- void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
- const unsigned int bsize = 128 / 8;
- struct skcipher_walk walk;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((nbytes = walk.nbytes)) {
- const u128 *src = (u128 *)walk.src.virt.addr;
- u128 *dst = (u128 *)walk.dst.virt.addr;
- u128 *iv = (u128 *)walk.iv;
-
- do {
- u128_xor(dst, src, iv);
- fn(ctx, (u8 *)dst, (u8 *)dst);
- iv = dst;
- src++;
- dst++;
- nbytes -= bsize;
- } while (nbytes >= bsize);
-
- *(u128 *)walk.iv = *iv;
- err = skcipher_walk_done(&walk, nbytes);
- }
- return err;
-}
-EXPORT_SYMBOL_GPL(glue_cbc_encrypt_req_128bit);
-
-int glue_cbc_decrypt_req_128bit(const struct common_glue_ctx *gctx,
- struct skcipher_request *req)
-{
- void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
- const unsigned int bsize = 128 / 8;
- struct skcipher_walk walk;
- bool fpu_enabled = false;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((nbytes = walk.nbytes)) {
- const u128 *src = walk.src.virt.addr;
- u128 *dst = walk.dst.virt.addr;
- unsigned int func_bytes, num_blocks;
- unsigned int i;
- u128 last_iv;
-
- fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled, nbytes);
- /* Start of the last block. */
- src += nbytes / bsize - 1;
- dst += nbytes / bsize - 1;
-
- last_iv = *src;
-
- for (i = 0; i < gctx->num_funcs; i++) {
- num_blocks = gctx->funcs[i].num_blocks;
- func_bytes = bsize * num_blocks;
-
- if (nbytes < func_bytes)
- continue;
-
- /* Process multi-block batch */
- do {
- src -= num_blocks - 1;
- dst -= num_blocks - 1;
-
- gctx->funcs[i].fn_u.cbc(ctx, (u8 *)dst,
- (const u8 *)src);
-
- nbytes -= func_bytes;
- if (nbytes < bsize)
- goto done;
-
- u128_xor(dst, dst, --src);
- dst--;
- } while (nbytes >= func_bytes);
- }
-done:
- u128_xor(dst, dst, (u128 *)walk.iv);
- *(u128 *)walk.iv = last_iv;
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- glue_fpu_end(fpu_enabled);
- return err;
-}
-EXPORT_SYMBOL_GPL(glue_cbc_decrypt_req_128bit);
-
-int glue_ctr_req_128bit(const struct common_glue_ctx *gctx,
- struct skcipher_request *req)
-{
- void *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
- const unsigned int bsize = 128 / 8;
- struct skcipher_walk walk;
- bool fpu_enabled = false;
- unsigned int nbytes;
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- while ((nbytes = walk.nbytes) >= bsize) {
- const u128 *src = walk.src.virt.addr;
- u128 *dst = walk.dst.virt.addr;
- unsigned int func_bytes, num_blocks;
- unsigned int i;
- le128 ctrblk;
-
- fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled, nbytes);
-
- be128_to_le128(&ctrblk, (be128 *)walk.iv);
-
- for (i = 0; i < gctx->num_funcs; i++) {
- num_blocks = gctx->funcs[i].num_blocks;
- func_bytes = bsize * num_blocks;
-
- if (nbytes < func_bytes)
- continue;
-
- /* Process multi-block batch */
- do {
- gctx->funcs[i].fn_u.ctr(ctx, (u8 *)dst,
- (const u8 *)src,
- &ctrblk);
- src += num_blocks;
- dst += num_blocks;
- nbytes -= func_bytes;
- } while (nbytes >= func_bytes);
-
- if (nbytes < bsize)
- break;
- }
-
- le128_to_be128((be128 *)walk.iv, &ctrblk);
- err = skcipher_walk_done(&walk, nbytes);
- }
-
- glue_fpu_end(fpu_enabled);
-
- if (nbytes) {
- le128 ctrblk;
- u128 tmp;
-
- be128_to_le128(&ctrblk, (be128 *)walk.iv);
- memcpy(&tmp, walk.src.virt.addr, nbytes);
- gctx->funcs[gctx->num_funcs - 1].fn_u.ctr(ctx, (u8 *)&tmp,
- (const u8 *)&tmp,
- &ctrblk);
- memcpy(walk.dst.virt.addr, &tmp, nbytes);
- le128_to_be128((be128 *)walk.iv, &ctrblk);
-
- err = skcipher_walk_done(&walk, 0);
- }
-
- return err;
-}
-EXPORT_SYMBOL_GPL(glue_ctr_req_128bit);
-
-static unsigned int __glue_xts_req_128bit(const struct common_glue_ctx *gctx,
- void *ctx,
- struct skcipher_walk *walk)
-{
- const unsigned int bsize = 128 / 8;
- unsigned int nbytes = walk->nbytes;
- u128 *src = walk->src.virt.addr;
- u128 *dst = walk->dst.virt.addr;
- unsigned int num_blocks, func_bytes;
- unsigned int i;
-
- /* Process multi-block batch */
- for (i = 0; i < gctx->num_funcs; i++) {
- num_blocks = gctx->funcs[i].num_blocks;
- func_bytes = bsize * num_blocks;
-
- if (nbytes >= func_bytes) {
- do {
- gctx->funcs[i].fn_u.xts(ctx, (u8 *)dst,
- (const u8 *)src,
- walk->iv);
-
- src += num_blocks;
- dst += num_blocks;
- nbytes -= func_bytes;
- } while (nbytes >= func_bytes);
-
- if (nbytes < bsize)
- goto done;
- }
- }
-
-done:
- return nbytes;
-}
-
-int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
- struct skcipher_request *req,
- common_glue_func_t tweak_fn, void *tweak_ctx,
- void *crypt_ctx, bool decrypt)
-{
- const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);
- const unsigned int bsize = 128 / 8;
- struct skcipher_request subreq;
- struct skcipher_walk walk;
- bool fpu_enabled = false;
- unsigned int nbytes, tail;
- int err;
-
- if (req->cryptlen < XTS_BLOCK_SIZE)
- return -EINVAL;
-
- if (unlikely(cts)) {
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-
- tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE;
-
- skcipher_request_set_tfm(&subreq, tfm);
- skcipher_request_set_callback(&subreq,
- crypto_skcipher_get_flags(tfm),
- NULL, NULL);
- skcipher_request_set_crypt(&subreq, req->src, req->dst,
- req->cryptlen - tail, req->iv);
- req = &subreq;
- }
-
- err = skcipher_walk_virt(&walk, req, false);
- nbytes = walk.nbytes;
- if (err)
- return err;
-
- /* set minimum length to bsize, for tweak_fn */
- fpu_enabled = glue_fpu_begin(bsize, gctx->fpu_blocks_limit,
- &walk, fpu_enabled,
- nbytes < bsize ? bsize : nbytes);
-
- /* calculate first value of T */
- tweak_fn(tweak_ctx, walk.iv, walk.iv);
-
- while (nbytes) {
- nbytes = __glue_xts_req_128bit(gctx, crypt_ctx, &walk);
-
- err = skcipher_walk_done(&walk, nbytes);
- nbytes = walk.nbytes;
- }
-
- if (unlikely(cts)) {
- u8 *next_tweak, *final_tweak = req->iv;
- struct scatterlist *src, *dst;
- struct scatterlist s[2], d[2];
- le128 b[2];
-
- dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen);
- if (req->dst != req->src)
- dst = scatterwalk_ffwd(d, req->dst, req->cryptlen);
-
- if (decrypt) {
- next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE);
- gf128mul_x_ble(b, b);
- } else {
- next_tweak = req->iv;
- }
-
- skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE,
- next_tweak);
-
- err = skcipher_walk_virt(&walk, req, false) ?:
- skcipher_walk_done(&walk,
- __glue_xts_req_128bit(gctx, crypt_ctx, &walk));
- if (err)
- goto out;
-
- scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0);
- memcpy(b + 1, b, tail - XTS_BLOCK_SIZE);
- scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE,
- tail - XTS_BLOCK_SIZE, 0);
- scatterwalk_map_and_copy(b, dst, 0, tail, 1);
-
- skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE,
- final_tweak);
-
- err = skcipher_walk_virt(&walk, req, false) ?:
- skcipher_walk_done(&walk,
- __glue_xts_req_128bit(gctx, crypt_ctx, &walk));
- }
-
-out:
- glue_fpu_end(fpu_enabled);
-
- return err;
-}
-EXPORT_SYMBOL_GPL(glue_xts_req_128bit);
-
-void glue_xts_crypt_128bit_one(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv, common_glue_func_t fn)
-{
- le128 ivblk = *iv;
-
- /* generate next IV */
- gf128mul_x_ble(iv, &ivblk);
-
- /* CC <- T xor C */
- u128_xor((u128 *)dst, (const u128 *)src, (u128 *)&ivblk);
-
- /* PP <- D(Key2,CC) */
- fn(ctx, dst, dst);
-
- /* P <- T xor PP */
- u128_xor((u128 *)dst, (u128 *)dst, (u128 *)&ivblk);
-}
-EXPORT_SYMBOL_GPL(glue_xts_crypt_128bit_one);
-
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index ba9e4c1e7f5c..b7ee24df7fba 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -18,10 +18,6 @@
.align 16
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
-.align 16
-.Lxts_gf128mul_and_shl1_mask:
- .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
.text
@@ -715,67 +711,3 @@ SYM_FUNC_START(serpent_cbc_dec_8way_avx)
FRAME_END
ret;
SYM_FUNC_END(serpent_cbc_dec_8way_avx)
-
-SYM_FUNC_START(serpent_ctr_8way_avx)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: iv (little endian, 128bit)
- */
- FRAME_BEGIN
-
- load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
- RD2, RK0, RK1, RK2);
-
- call __serpent_enc_blk8_avx;
-
- store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
- FRAME_END
- ret;
-SYM_FUNC_END(serpent_ctr_8way_avx)
-
-SYM_FUNC_START(serpent_xts_enc_8way_avx)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
- FRAME_BEGIN
-
- /* regs <= src, dst <= IVs, regs <= regs xor IVs */
- load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
- RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask);
-
- call __serpent_enc_blk8_avx;
-
- /* dst <= regs xor IVs(in dst) */
- store_xts_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
- FRAME_END
- ret;
-SYM_FUNC_END(serpent_xts_enc_8way_avx)
-
-SYM_FUNC_START(serpent_xts_dec_8way_avx)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
- FRAME_BEGIN
-
- /* regs <= src, dst <= IVs, regs <= regs xor IVs */
- load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
- RK0, RK1, RK2, .Lxts_gf128mul_and_shl1_mask);
-
- call __serpent_dec_blk8_avx;
-
- /* dst <= regs xor IVs(in dst) */
- store_xts_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
-
- FRAME_END
- ret;
-SYM_FUNC_END(serpent_xts_dec_8way_avx)
diff --git a/arch/x86/crypto/serpent-avx.h b/arch/x86/crypto/serpent-avx.h
new file mode 100644
index 000000000000..23f3361a0e72
--- /dev/null
+++ b/arch/x86/crypto/serpent-avx.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_X86_SERPENT_AVX_H
+#define ASM_X86_SERPENT_AVX_H
+
+#include <crypto/b128ops.h>
+#include <crypto/serpent.h>
+#include <linux/types.h>
+
+struct crypto_skcipher;
+
+#define SERPENT_PARALLEL_BLOCKS 8
+
+asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void serpent_ecb_dec_8way_avx(const void *ctx, u8 *dst,
+ const u8 *src);
+
+asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst,
+ const u8 *src);
+
+#endif
diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S
index c9648aeae705..9161b6e441f3 100644
--- a/arch/x86/crypto/serpent-avx2-asm_64.S
+++ b/arch/x86/crypto/serpent-avx2-asm_64.S
@@ -20,16 +20,6 @@
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-.section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16
-.align 16
-.Lxts_gf128mul_and_shl1_mask_0:
- .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
-
-.section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16
-.align 16
-.Lxts_gf128mul_and_shl1_mask_1:
- .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
-
.text
#define CTX %rdi
@@ -734,80 +724,3 @@ SYM_FUNC_START(serpent_cbc_dec_16way)
FRAME_END
ret;
SYM_FUNC_END(serpent_cbc_dec_16way)
-
-SYM_FUNC_START(serpent_ctr_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (little endian, 128bit)
- */
- FRAME_BEGIN
-
- vzeroupper;
-
- load_ctr_16way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
- RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
- tp);
-
- call __serpent_enc_blk16;
-
- store_ctr_16way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
- vzeroupper;
-
- FRAME_END
- ret;
-SYM_FUNC_END(serpent_ctr_16way)
-
-SYM_FUNC_START(serpent_xts_enc_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
- FRAME_BEGIN
-
- vzeroupper;
-
- load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
- RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
- .Lxts_gf128mul_and_shl1_mask_0,
- .Lxts_gf128mul_and_shl1_mask_1);
-
- call __serpent_enc_blk16;
-
- store_xts_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
- vzeroupper;
-
- FRAME_END
- ret;
-SYM_FUNC_END(serpent_xts_enc_16way)
-
-SYM_FUNC_START(serpent_xts_dec_16way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (16 blocks)
- * %rdx: src (16 blocks)
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
- FRAME_BEGIN
-
- vzeroupper;
-
- load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
- RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
- .Lxts_gf128mul_and_shl1_mask_0,
- .Lxts_gf128mul_and_shl1_mask_1);
-
- call __serpent_dec_blk16;
-
- store_xts_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
-
- vzeroupper;
-
- FRAME_END
- ret;
-SYM_FUNC_END(serpent_xts_dec_16way)
diff --git a/arch/x86/crypto/serpent-sse2.h b/arch/x86/crypto/serpent-sse2.h
new file mode 100644
index 000000000000..860ca248914b
--- /dev/null
+++ b/arch/x86/crypto/serpent-sse2.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_X86_SERPENT_SSE2_H
+#define ASM_X86_SERPENT_SSE2_H
+
+#include <linux/crypto.h>
+#include <crypto/serpent.h>
+
+#ifdef CONFIG_X86_32
+
+#define SERPENT_PARALLEL_BLOCKS 4
+
+asmlinkage void __serpent_enc_blk_4way(const struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src, bool xor);
+asmlinkage void serpent_dec_blk_4way(const struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src)
+{
+ __serpent_enc_blk_4way(ctx, dst, src, false);
+}
+
+static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx,
+ u8 *dst, const u8 *src)
+{
+ __serpent_enc_blk_4way(ctx, dst, src, true);
+}
+
+static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src)
+{
+ serpent_dec_blk_4way(ctx, dst, src);
+}
+
+#else
+
+#define SERPENT_PARALLEL_BLOCKS 8
+
+asmlinkage void __serpent_enc_blk_8way(const struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src, bool xor);
+asmlinkage void serpent_dec_blk_8way(const struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+static inline void serpent_enc_blk_xway(const void *ctx, u8 *dst, const u8 *src)
+{
+ __serpent_enc_blk_8way(ctx, dst, src, false);
+}
+
+static inline void serpent_enc_blk_xway_xor(const struct serpent_ctx *ctx,
+ u8 *dst, const u8 *src)
+{
+ __serpent_enc_blk_8way(ctx, dst, src, true);
+}
+
+static inline void serpent_dec_blk_xway(const void *ctx, u8 *dst, const u8 *src)
+{
+ serpent_dec_blk_8way(ctx, dst, src);
+}
+
+#endif
+
+#endif
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index f973ace44ad3..ccf0b5fa4933 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -12,9 +12,9 @@
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <crypto/serpent.h>
-#include <crypto/xts.h>
-#include <asm/crypto/glue_helper.h>
-#include <asm/crypto/serpent-avx.h>
+
+#include "serpent-avx.h"
+#include "ecb_cbc_helpers.h"
#define SERPENT_AVX2_PARALLEL_BLOCKS 16
@@ -23,158 +23,44 @@ asmlinkage void serpent_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
asmlinkage void serpent_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
asmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
-asmlinkage void serpent_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-asmlinkage void serpent_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-asmlinkage void serpent_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-
static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
const u8 *key, unsigned int keylen)
{
return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
}
-static const struct common_glue_ctx serpent_enc = {
- .num_funcs = 3,
- .fpu_blocks_limit = 8,
-
- .funcs = { {
- .num_blocks = 16,
- .fn_u = { .ecb = serpent_ecb_enc_16way }
- }, {
- .num_blocks = 8,
- .fn_u = { .ecb = serpent_ecb_enc_8way_avx }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = __serpent_encrypt }
- } }
-};
-
-static const struct common_glue_ctx serpent_ctr = {
- .num_funcs = 3,
- .fpu_blocks_limit = 8,
-
- .funcs = { {
- .num_blocks = 16,
- .fn_u = { .ctr = serpent_ctr_16way }
- }, {
- .num_blocks = 8,
- .fn_u = { .ctr = serpent_ctr_8way_avx }
- }, {
- .num_blocks = 1,
- .fn_u = { .ctr = __serpent_crypt_ctr }
- } }
-};
-
-static const struct common_glue_ctx serpent_enc_xts = {
- .num_funcs = 3,
- .fpu_blocks_limit = 8,
-
- .funcs = { {
- .num_blocks = 16,
- .fn_u = { .xts = serpent_xts_enc_16way }
- }, {
- .num_blocks = 8,
- .fn_u = { .xts = serpent_xts_enc_8way_avx }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = serpent_xts_enc }
- } }
-};
-
-static const struct common_glue_ctx serpent_dec = {
- .num_funcs = 3,
- .fpu_blocks_limit = 8,
-
- .funcs = { {
- .num_blocks = 16,
- .fn_u = { .ecb = serpent_ecb_dec_16way }
- }, {
- .num_blocks = 8,
- .fn_u = { .ecb = serpent_ecb_dec_8way_avx }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = __serpent_decrypt }
- } }
-};
-
-static const struct common_glue_ctx serpent_dec_cbc = {
- .num_funcs = 3,
- .fpu_blocks_limit = 8,
-
- .funcs = { {
- .num_blocks = 16,
- .fn_u = { .cbc = serpent_cbc_dec_16way }
- }, {
- .num_blocks = 8,
- .fn_u = { .cbc = serpent_cbc_dec_8way_avx }
- }, {
- .num_blocks = 1,
- .fn_u = { .cbc = __serpent_decrypt }
- } }
-};
-
-static const struct common_glue_ctx serpent_dec_xts = {
- .num_funcs = 3,
- .fpu_blocks_limit = 8,
-
- .funcs = { {
- .num_blocks = 16,
- .fn_u = { .xts = serpent_xts_dec_16way }
- }, {
- .num_blocks = 8,
- .fn_u = { .xts = serpent_xts_dec_8way_avx }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = serpent_xts_dec }
- } }
-};
-
static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&serpent_enc, req);
+ ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS);
+ ECB_BLOCK(SERPENT_AVX2_PARALLEL_BLOCKS, serpent_ecb_enc_16way);
+ ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_enc_8way_avx);
+ ECB_BLOCK(1, __serpent_encrypt);
+ ECB_WALK_END();
}
static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&serpent_dec, req);
+ ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS);
+ ECB_BLOCK(SERPENT_AVX2_PARALLEL_BLOCKS, serpent_ecb_dec_16way);
+ ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_dec_8way_avx);
+ ECB_BLOCK(1, __serpent_decrypt);
+ ECB_WALK_END();
}
static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req);
+ CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1);
+ CBC_ENC_BLOCK(__serpent_encrypt);
+ CBC_WALK_END();
}
static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
-}
-
-static int ctr_crypt(struct skcipher_request *req)
-{
- return glue_ctr_req_128bit(&serpent_ctr, req);
-}
-
-static int xts_encrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&serpent_enc_xts, req,
- __serpent_encrypt, &ctx->tweak_ctx,
- &ctx->crypt_ctx, false);
-}
-
-static int xts_decrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&serpent_dec_xts, req,
- __serpent_encrypt, &ctx->tweak_ctx,
- &ctx->crypt_ctx, true);
+ CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS);
+ CBC_DEC_BLOCK(SERPENT_AVX2_PARALLEL_BLOCKS, serpent_cbc_dec_16way);
+ CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_cbc_dec_8way_avx);
+ CBC_DEC_BLOCK(1, __serpent_decrypt);
+ CBC_WALK_END();
}
static struct skcipher_alg serpent_algs[] = {
@@ -205,35 +91,6 @@ static struct skcipher_alg serpent_algs[] = {
.setkey = serpent_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- }, {
- .base.cra_name = "__ctr(serpent)",
- .base.cra_driver_name = "__ctr-serpent-avx2",
- .base.cra_priority = 600,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct serpent_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .chunksize = SERPENT_BLOCK_SIZE,
- .setkey = serpent_setkey_skcipher,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- }, {
- .base.cra_name = "__xts(serpent)",
- .base.cra_driver_name = "__xts-serpent-avx2",
- .base.cra_priority = 600,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = SERPENT_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct serpent_xts_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * SERPENT_MIN_KEY_SIZE,
- .max_keysize = 2 * SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = xts_serpent_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
},
};
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 7806d1cbe854..6c248e1ea4ef 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -15,9 +15,9 @@
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <crypto/serpent.h>
-#include <crypto/xts.h>
-#include <asm/crypto/glue_helper.h>
-#include <asm/crypto/serpent-avx.h>
+
+#include "serpent-avx.h"
+#include "ecb_cbc_helpers.h"
/* 8-way parallel cipher functions */
asmlinkage void serpent_ecb_enc_8way_avx(const void *ctx, u8 *dst,
@@ -32,191 +32,41 @@ asmlinkage void serpent_cbc_dec_8way_avx(const void *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(serpent_cbc_dec_8way_avx);
-asmlinkage void serpent_ctr_8way_avx(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-EXPORT_SYMBOL_GPL(serpent_ctr_8way_avx);
-
-asmlinkage void serpent_xts_enc_8way_avx(const void *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-EXPORT_SYMBOL_GPL(serpent_xts_enc_8way_avx);
-
-asmlinkage void serpent_xts_dec_8way_avx(const void *ctx, u8 *dst,
- const u8 *src, le128 *iv);
-EXPORT_SYMBOL_GPL(serpent_xts_dec_8way_avx);
-
-void __serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
-{
- be128 ctrblk;
- u128 *dst = (u128 *)d;
- const u128 *src = (const u128 *)s;
-
- le128_to_be128(&ctrblk, iv);
- le128_inc(iv);
-
- __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
- u128_xor(dst, src, (u128 *)&ctrblk);
-}
-EXPORT_SYMBOL_GPL(__serpent_crypt_ctr);
-
-void serpent_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
-{
- glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_encrypt);
-}
-EXPORT_SYMBOL_GPL(serpent_xts_enc);
-
-void serpent_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
-{
- glue_xts_crypt_128bit_one(ctx, dst, src, iv, __serpent_decrypt);
-}
-EXPORT_SYMBOL_GPL(serpent_xts_dec);
-
static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
const u8 *key, unsigned int keylen)
{
return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
}
-int xts_serpent_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- /* first half of xts-key is for crypt */
- err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
- if (err)
- return err;
-
- /* second half of xts-key is for tweak */
- return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-EXPORT_SYMBOL_GPL(xts_serpent_setkey);
-
-static const struct common_glue_ctx serpent_enc = {
- .num_funcs = 2,
- .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .ecb = serpent_ecb_enc_8way_avx }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = __serpent_encrypt }
- } }
-};
-
-static const struct common_glue_ctx serpent_ctr = {
- .num_funcs = 2,
- .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .ctr = serpent_ctr_8way_avx }
- }, {
- .num_blocks = 1,
- .fn_u = { .ctr = __serpent_crypt_ctr }
- } }
-};
-
-static const struct common_glue_ctx serpent_enc_xts = {
- .num_funcs = 2,
- .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .xts = serpent_xts_enc_8way_avx }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = serpent_xts_enc }
- } }
-};
-
-static const struct common_glue_ctx serpent_dec = {
- .num_funcs = 2,
- .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .ecb = serpent_ecb_dec_8way_avx }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = __serpent_decrypt }
- } }
-};
-
-static const struct common_glue_ctx serpent_dec_cbc = {
- .num_funcs = 2,
- .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .cbc = serpent_cbc_dec_8way_avx }
- }, {
- .num_blocks = 1,
- .fn_u = { .cbc = __serpent_decrypt }
- } }
-};
-
-static const struct common_glue_ctx serpent_dec_xts = {
- .num_funcs = 2,
- .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .xts = serpent_xts_dec_8way_avx }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = serpent_xts_dec }
- } }
-};
-
static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&serpent_enc, req);
+ ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS);
+ ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_enc_8way_avx);
+ ECB_BLOCK(1, __serpent_encrypt);
+ ECB_WALK_END();
}
static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&serpent_dec, req);
+ ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS);
+ ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_ecb_dec_8way_avx);
+ ECB_BLOCK(1, __serpent_decrypt);
+ ECB_WALK_END();
}
static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req);
+ CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1);
+ CBC_ENC_BLOCK(__serpent_encrypt);
+ CBC_WALK_END();
}
static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
-}
-
-static int ctr_crypt(struct skcipher_request *req)
-{
- return glue_ctr_req_128bit(&serpent_ctr, req);
-}
-
-static int xts_encrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&serpent_enc_xts, req,
- __serpent_encrypt, &ctx->tweak_ctx,
- &ctx->crypt_ctx, false);
-}
-
-static int xts_decrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&serpent_dec_xts, req,
- __serpent_encrypt, &ctx->tweak_ctx,
- &ctx->crypt_ctx, true);
+ CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS);
+ CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_cbc_dec_8way_avx);
+ CBC_DEC_BLOCK(1, __serpent_decrypt);
+ CBC_WALK_END();
}
static struct skcipher_alg serpent_algs[] = {
@@ -247,35 +97,6 @@ static struct skcipher_alg serpent_algs[] = {
.setkey = serpent_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- }, {
- .base.cra_name = "__ctr(serpent)",
- .base.cra_driver_name = "__ctr-serpent-avx",
- .base.cra_priority = 500,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct serpent_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .chunksize = SERPENT_BLOCK_SIZE,
- .setkey = serpent_setkey_skcipher,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- }, {
- .base.cra_name = "__xts(serpent)",
- .base.cra_driver_name = "__xts-serpent-avx",
- .base.cra_priority = 500,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = SERPENT_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct serpent_xts_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * SERPENT_MIN_KEY_SIZE,
- .max_keysize = 2 * SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .setkey = xts_serpent_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
},
};
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 4fed8d26b91a..d78f37e9b2cf 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -10,8 +10,6 @@
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
- * CTR part based on code (crypto/ctr.c) by:
- * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*/
#include <linux/module.h>
@@ -22,8 +20,9 @@
#include <crypto/b128ops.h>
#include <crypto/internal/simd.h>
#include <crypto/serpent.h>
-#include <asm/crypto/serpent-sse2.h>
-#include <asm/crypto/glue_helper.h>
+
+#include "serpent-sse2.h"
+#include "ecb_cbc_helpers.h"
static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
const u8 *key, unsigned int keylen)
@@ -31,130 +30,46 @@ static int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
}
-static void serpent_decrypt_cbc_xway(const void *ctx, u8 *d, const u8 *s)
-{
- u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
- u128 *dst = (u128 *)d;
- const u128 *src = (const u128 *)s;
- unsigned int j;
-
- for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
- ivs[j] = src[j];
-
- serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
-
- for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
- u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
-}
-
-static void serpent_crypt_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
-{
- be128 ctrblk;
- u128 *dst = (u128 *)d;
- const u128 *src = (const u128 *)s;
-
- le128_to_be128(&ctrblk, iv);
- le128_inc(iv);
-
- __serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
- u128_xor(dst, src, (u128 *)&ctrblk);
-}
-
-static void serpent_crypt_ctr_xway(const void *ctx, u8 *d, const u8 *s,
- le128 *iv)
+static void serpent_decrypt_cbc_xway(const void *ctx, u8 *dst, const u8 *src)
{
- be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
- u128 *dst = (u128 *)d;
- const u128 *src = (const u128 *)s;
- unsigned int i;
-
- for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
- if (dst != src)
- dst[i] = src[i];
-
- le128_to_be128(&ctrblks[i], iv);
- le128_inc(iv);
- }
+ u8 buf[SERPENT_PARALLEL_BLOCKS - 1][SERPENT_BLOCK_SIZE];
+ const u8 *s = src;
- serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
+ if (dst == src)
+ s = memcpy(buf, src, sizeof(buf));
+ serpent_dec_blk_xway(ctx, dst, src);
+ crypto_xor(dst + SERPENT_BLOCK_SIZE, s, sizeof(buf));
}
-static const struct common_glue_ctx serpent_enc = {
- .num_funcs = 2,
- .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .ecb = serpent_enc_blk_xway }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = __serpent_encrypt }
- } }
-};
-
-static const struct common_glue_ctx serpent_ctr = {
- .num_funcs = 2,
- .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .ctr = serpent_crypt_ctr_xway }
- }, {
- .num_blocks = 1,
- .fn_u = { .ctr = serpent_crypt_ctr }
- } }
-};
-
-static const struct common_glue_ctx serpent_dec = {
- .num_funcs = 2,
- .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .ecb = serpent_dec_blk_xway }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = __serpent_decrypt }
- } }
-};
-
-static const struct common_glue_ctx serpent_dec_cbc = {
- .num_funcs = 2,
- .fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .cbc = serpent_decrypt_cbc_xway }
- }, {
- .num_blocks = 1,
- .fn_u = { .cbc = __serpent_decrypt }
- } }
-};
-
static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&serpent_enc, req);
+ ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS);
+ ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_enc_blk_xway);
+ ECB_BLOCK(1, __serpent_encrypt);
+ ECB_WALK_END();
}
static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&serpent_dec, req);
+ ECB_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS);
+ ECB_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_dec_blk_xway);
+ ECB_BLOCK(1, __serpent_decrypt);
+ ECB_WALK_END();
}
static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_req_128bit(__serpent_encrypt,
- req);
+ CBC_WALK_START(req, SERPENT_BLOCK_SIZE, -1);
+ CBC_ENC_BLOCK(__serpent_encrypt);
+ CBC_WALK_END();
}
static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
-}
-
-static int ctr_crypt(struct skcipher_request *req)
-{
- return glue_ctr_req_128bit(&serpent_ctr, req);
+ CBC_WALK_START(req, SERPENT_BLOCK_SIZE, SERPENT_PARALLEL_BLOCKS);
+ CBC_DEC_BLOCK(SERPENT_PARALLEL_BLOCKS, serpent_decrypt_cbc_xway);
+ CBC_DEC_BLOCK(1, __serpent_decrypt);
+ CBC_WALK_END();
}
static struct skcipher_alg serpent_algs[] = {
@@ -185,21 +100,6 @@ static struct skcipher_alg serpent_algs[] = {
.setkey = serpent_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- }, {
- .base.cra_name = "__ctr(serpent)",
- .base.cra_driver_name = "__ctr-serpent-sse2",
- .base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct serpent_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = SERPENT_MIN_KEY_SIZE,
- .max_keysize = SERPENT_MAX_KEY_SIZE,
- .ivsize = SERPENT_BLOCK_SIZE,
- .chunksize = SERPENT_BLOCK_SIZE,
- .setkey = serpent_setkey_skcipher,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
},
};
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index a5151393bb2f..37e63b3c664e 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -19,11 +19,6 @@
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16
-.align 16
-.Lxts_gf128mul_and_shl1_mask:
- .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
-
.text
/* structure of crypto context */
@@ -379,78 +374,3 @@ SYM_FUNC_START(twofish_cbc_dec_8way)
FRAME_END
ret;
SYM_FUNC_END(twofish_cbc_dec_8way)
-
-SYM_FUNC_START(twofish_ctr_8way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: iv (little endian, 128bit)
- */
- FRAME_BEGIN
-
- pushq %r12;
-
- movq %rsi, %r11;
- movq %rdx, %r12;
-
- load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
- RD2, RX0, RX1, RY0);
-
- call __twofish_enc_blk8;
-
- store_ctr_8way(%r12, %r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
-
- popq %r12;
-
- FRAME_END
- ret;
-SYM_FUNC_END(twofish_ctr_8way)
-
-SYM_FUNC_START(twofish_xts_enc_8way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
- FRAME_BEGIN
-
- movq %rsi, %r11;
-
- /* regs <= src, dst <= IVs, regs <= regs xor IVs */
- load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
- RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);
-
- call __twofish_enc_blk8;
-
- /* dst <= regs xor IVs(in dst) */
- store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
-
- FRAME_END
- ret;
-SYM_FUNC_END(twofish_xts_enc_8way)
-
-SYM_FUNC_START(twofish_xts_dec_8way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
- */
- FRAME_BEGIN
-
- movq %rsi, %r11;
-
- /* regs <= src, dst <= IVs, regs <= regs xor IVs */
- load_xts_8way(%rcx, %rdx, %rsi, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2,
- RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);
-
- call __twofish_dec_blk8;
-
- /* dst <= regs xor IVs(in dst) */
- store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
-
- FRAME_END
- ret;
-SYM_FUNC_END(twofish_xts_dec_8way)
diff --git a/arch/x86/crypto/twofish.h b/arch/x86/crypto/twofish.h
new file mode 100644
index 000000000000..12df400e6d53
--- /dev/null
+++ b/arch/x86/crypto/twofish.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_X86_TWOFISH_H
+#define ASM_X86_TWOFISH_H
+
+#include <linux/crypto.h>
+#include <crypto/twofish.h>
+#include <crypto/b128ops.h>
+
+/* regular block cipher functions from twofish_x86_64 module */
+asmlinkage void twofish_enc_blk(const void *ctx, u8 *dst, const u8 *src);
+asmlinkage void twofish_dec_blk(const void *ctx, u8 *dst, const u8 *src);
+
+/* 3-way parallel cipher functions */
+asmlinkage void __twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src,
+ bool xor);
+asmlinkage void twofish_dec_blk_3way(const void *ctx, u8 *dst, const u8 *src);
+
+/* helpers from twofish_x86_64-3way module */
+extern void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src);
+
+#endif /* ASM_X86_TWOFISH_H */
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 2dbc8ce3730e..3eb3440b477a 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -15,9 +15,9 @@
#include <crypto/algapi.h>
#include <crypto/internal/simd.h>
#include <crypto/twofish.h>
-#include <crypto/xts.h>
-#include <asm/crypto/glue_helper.h>
-#include <asm/crypto/twofish.h>
+
+#include "twofish.h"
+#include "ecb_cbc_helpers.h"
#define TWOFISH_PARALLEL_BLOCKS 8
@@ -26,13 +26,6 @@ asmlinkage void twofish_ecb_enc_8way(const void *ctx, u8 *dst, const u8 *src);
asmlinkage void twofish_ecb_dec_8way(const void *ctx, u8 *dst, const u8 *src);
asmlinkage void twofish_cbc_dec_8way(const void *ctx, u8 *dst, const u8 *src);
-asmlinkage void twofish_ctr_8way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-
-asmlinkage void twofish_xts_enc_8way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
-asmlinkage void twofish_xts_dec_8way(const void *ctx, u8 *dst, const u8 *src,
- le128 *iv);
static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
const u8 *key, unsigned int keylen)
@@ -45,171 +38,38 @@ static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src)
__twofish_enc_blk_3way(ctx, dst, src, false);
}
-static void twofish_xts_enc(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
-{
- glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_enc_blk);
-}
-
-static void twofish_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv)
-{
- glue_xts_crypt_128bit_one(ctx, dst, src, iv, twofish_dec_blk);
-}
-
-struct twofish_xts_ctx {
- struct twofish_ctx tweak_ctx;
- struct twofish_ctx crypt_ctx;
-};
-
-static int xts_twofish_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int keylen)
-{
- struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
- int err;
-
- err = xts_verify_key(tfm, key, keylen);
- if (err)
- return err;
-
- /* first half of xts-key is for crypt */
- err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2);
- if (err)
- return err;
-
- /* second half of xts-key is for tweak */
- return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
-}
-
-static const struct common_glue_ctx twofish_enc = {
- .num_funcs = 3,
- .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = TWOFISH_PARALLEL_BLOCKS,
- .fn_u = { .ecb = twofish_ecb_enc_8way }
- }, {
- .num_blocks = 3,
- .fn_u = { .ecb = twofish_enc_blk_3way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = twofish_enc_blk }
- } }
-};
-
-static const struct common_glue_ctx twofish_ctr = {
- .num_funcs = 3,
- .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = TWOFISH_PARALLEL_BLOCKS,
- .fn_u = { .ctr = twofish_ctr_8way }
- }, {
- .num_blocks = 3,
- .fn_u = { .ctr = twofish_enc_blk_ctr_3way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ctr = twofish_enc_blk_ctr }
- } }
-};
-
-static const struct common_glue_ctx twofish_enc_xts = {
- .num_funcs = 2,
- .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = TWOFISH_PARALLEL_BLOCKS,
- .fn_u = { .xts = twofish_xts_enc_8way }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = twofish_xts_enc }
- } }
-};
-
-static const struct common_glue_ctx twofish_dec = {
- .num_funcs = 3,
- .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = TWOFISH_PARALLEL_BLOCKS,
- .fn_u = { .ecb = twofish_ecb_dec_8way }
- }, {
- .num_blocks = 3,
- .fn_u = { .ecb = twofish_dec_blk_3way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = twofish_dec_blk }
- } }
-};
-
-static const struct common_glue_ctx twofish_dec_cbc = {
- .num_funcs = 3,
- .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = TWOFISH_PARALLEL_BLOCKS,
- .fn_u = { .cbc = twofish_cbc_dec_8way }
- }, {
- .num_blocks = 3,
- .fn_u = { .cbc = twofish_dec_blk_cbc_3way }
- }, {
- .num_blocks = 1,
- .fn_u = { .cbc = twofish_dec_blk }
- } }
-};
-
-static const struct common_glue_ctx twofish_dec_xts = {
- .num_funcs = 2,
- .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
-
- .funcs = { {
- .num_blocks = TWOFISH_PARALLEL_BLOCKS,
- .fn_u = { .xts = twofish_xts_dec_8way }
- }, {
- .num_blocks = 1,
- .fn_u = { .xts = twofish_xts_dec }
- } }
-};
-
static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&twofish_enc, req);
+ ECB_WALK_START(req, TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS);
+ ECB_BLOCK(TWOFISH_PARALLEL_BLOCKS, twofish_ecb_enc_8way);
+ ECB_BLOCK(3, twofish_enc_blk_3way);
+ ECB_BLOCK(1, twofish_enc_blk);
+ ECB_WALK_END();
}
static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&twofish_dec, req);
+ ECB_WALK_START(req, TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS);
+ ECB_BLOCK(TWOFISH_PARALLEL_BLOCKS, twofish_ecb_dec_8way);
+ ECB_BLOCK(3, twofish_dec_blk_3way);
+ ECB_BLOCK(1, twofish_dec_blk);
+ ECB_WALK_END();
}
static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
+ CBC_WALK_START(req, TF_BLOCK_SIZE, -1);
+ CBC_ENC_BLOCK(twofish_enc_blk);
+ CBC_WALK_END();
}
static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
-}
-
-static int ctr_crypt(struct skcipher_request *req)
-{
- return glue_ctr_req_128bit(&twofish_ctr, req);
-}
-
-static int xts_encrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&twofish_enc_xts, req, twofish_enc_blk,
- &ctx->tweak_ctx, &ctx->crypt_ctx, false);
-}
-
-static int xts_decrypt(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct twofish_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- return glue_xts_req_128bit(&twofish_dec_xts, req, twofish_enc_blk,
- &ctx->tweak_ctx, &ctx->crypt_ctx, true);
+ CBC_WALK_START(req, TF_BLOCK_SIZE, TWOFISH_PARALLEL_BLOCKS);
+ CBC_DEC_BLOCK(TWOFISH_PARALLEL_BLOCKS, twofish_cbc_dec_8way);
+ CBC_DEC_BLOCK(3, twofish_dec_blk_cbc_3way);
+ CBC_DEC_BLOCK(1, twofish_dec_blk);
+ CBC_WALK_END();
}
static struct skcipher_alg twofish_algs[] = {
@@ -240,35 +100,6 @@ static struct skcipher_alg twofish_algs[] = {
.setkey = twofish_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- }, {
- .base.cra_name = "__ctr(twofish)",
- .base.cra_driver_name = "__ctr-twofish-avx",
- .base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct twofish_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .chunksize = TF_BLOCK_SIZE,
- .setkey = twofish_setkey_skcipher,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- }, {
- .base.cra_name = "__xts(twofish)",
- .base.cra_driver_name = "__xts-twofish-avx",
- .base.cra_priority = 400,
- .base.cra_flags = CRYPTO_ALG_INTERNAL,
- .base.cra_blocksize = TF_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct twofish_xts_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = 2 * TF_MIN_KEY_SIZE,
- .max_keysize = 2 * TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .setkey = xts_twofish_setkey,
- .encrypt = xts_encrypt,
- .decrypt = xts_decrypt,
},
};
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 768af6075479..03725696397c 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -5,17 +5,16 @@
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*/
-#include <asm/crypto/glue_helper.h>
-#include <asm/crypto/twofish.h>
#include <crypto/algapi.h>
-#include <crypto/b128ops.h>
-#include <crypto/internal/skcipher.h>
#include <crypto/twofish.h>
#include <linux/crypto.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/types.h>
+#include "twofish.h"
+#include "ecb_cbc_helpers.h"
+
EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
@@ -30,143 +29,48 @@ static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src)
__twofish_enc_blk_3way(ctx, dst, src, false);
}
-static inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst,
- const u8 *src)
-{
- __twofish_enc_blk_3way(ctx, dst, src, true);
-}
-
-void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s)
+void twofish_dec_blk_cbc_3way(const void *ctx, u8 *dst, const u8 *src)
{
- u128 ivs[2];
- u128 *dst = (u128 *)d;
- const u128 *src = (const u128 *)s;
-
- ivs[0] = src[0];
- ivs[1] = src[1];
+ u8 buf[2][TF_BLOCK_SIZE];
+ const u8 *s = src;
- twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
+ if (dst == src)
+ s = memcpy(buf, src, sizeof(buf));
+ twofish_dec_blk_3way(ctx, dst, src);
+ crypto_xor(dst + TF_BLOCK_SIZE, s, sizeof(buf));
- u128_xor(&dst[1], &dst[1], &ivs[0]);
- u128_xor(&dst[2], &dst[2], &ivs[1]);
}
EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
-void twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
-{
- be128 ctrblk;
- u128 *dst = (u128 *)d;
- const u128 *src = (const u128 *)s;
-
- if (dst != src)
- *dst = *src;
-
- le128_to_be128(&ctrblk, iv);
- le128_inc(iv);
-
- twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
- u128_xor(dst, dst, (u128 *)&ctrblk);
-}
-EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
-
-void twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
-{
- be128 ctrblks[3];
- u128 *dst = (u128 *)d;
- const u128 *src = (const u128 *)s;
-
- if (dst != src) {
- dst[0] = src[0];
- dst[1] = src[1];
- dst[2] = src[2];
- }
-
- le128_to_be128(&ctrblks[0], iv);
- le128_inc(iv);
- le128_to_be128(&ctrblks[1], iv);
- le128_inc(iv);
- le128_to_be128(&ctrblks[2], iv);
- le128_inc(iv);
-
- twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
-}
-EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
-
-static const struct common_glue_ctx twofish_enc = {
- .num_funcs = 2,
- .fpu_blocks_limit = -1,
-
- .funcs = { {
- .num_blocks = 3,
- .fn_u = { .ecb = twofish_enc_blk_3way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = twofish_enc_blk }
- } }
-};
-
-static const struct common_glue_ctx twofish_ctr = {
- .num_funcs = 2,
- .fpu_blocks_limit = -1,
-
- .funcs = { {
- .num_blocks = 3,
- .fn_u = { .ctr = twofish_enc_blk_ctr_3way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ctr = twofish_enc_blk_ctr }
- } }
-};
-
-static const struct common_glue_ctx twofish_dec = {
- .num_funcs = 2,
- .fpu_blocks_limit = -1,
-
- .funcs = { {
- .num_blocks = 3,
- .fn_u = { .ecb = twofish_dec_blk_3way }
- }, {
- .num_blocks = 1,
- .fn_u = { .ecb = twofish_dec_blk }
- } }
-};
-
-static const struct common_glue_ctx twofish_dec_cbc = {
- .num_funcs = 2,
- .fpu_blocks_limit = -1,
-
- .funcs = { {
- .num_blocks = 3,
- .fn_u = { .cbc = twofish_dec_blk_cbc_3way }
- }, {
- .num_blocks = 1,
- .fn_u = { .cbc = twofish_dec_blk }
- } }
-};
-
static int ecb_encrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&twofish_enc, req);
+ ECB_WALK_START(req, TF_BLOCK_SIZE, -1);
+ ECB_BLOCK(3, twofish_enc_blk_3way);
+ ECB_BLOCK(1, twofish_enc_blk);
+ ECB_WALK_END();
}
static int ecb_decrypt(struct skcipher_request *req)
{
- return glue_ecb_req_128bit(&twofish_dec, req);
+ ECB_WALK_START(req, TF_BLOCK_SIZE, -1);
+ ECB_BLOCK(3, twofish_dec_blk_3way);
+ ECB_BLOCK(1, twofish_dec_blk);
+ ECB_WALK_END();
}
static int cbc_encrypt(struct skcipher_request *req)
{
- return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
+ CBC_WALK_START(req, TF_BLOCK_SIZE, -1);
+ CBC_ENC_BLOCK(twofish_enc_blk);
+ CBC_WALK_END();
}
static int cbc_decrypt(struct skcipher_request *req)
{
- return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
-}
-
-static int ctr_crypt(struct skcipher_request *req)
-{
- return glue_ctr_req_128bit(&twofish_ctr, req);
+ CBC_WALK_START(req, TF_BLOCK_SIZE, -1);
+ CBC_DEC_BLOCK(3, twofish_dec_blk_cbc_3way);
+ CBC_DEC_BLOCK(1, twofish_dec_blk);
+ CBC_WALK_END();
}
static struct skcipher_alg tf_skciphers[] = {
@@ -195,20 +99,6 @@ static struct skcipher_alg tf_skciphers[] = {
.setkey = twofish_setkey_skcipher,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
- }, {
- .base.cra_name = "ctr(twofish)",
- .base.cra_driver_name = "ctr-twofish-3way",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct twofish_ctx),
- .base.cra_module = THIS_MODULE,
- .min_keysize = TF_MIN_KEY_SIZE,
- .max_keysize = TF_MAX_KEY_SIZE,
- .ivsize = TF_BLOCK_SIZE,
- .chunksize = TF_BLOCK_SIZE,
- .setkey = twofish_setkey_skcipher,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
},
};