From 78c37d191dd6899d8c219fee597a17d6e3c5d288 Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Thu, 10 Jan 2013 18:54:59 +0400 Subject: crypto: crc32 - add crc32 pclmulqdq implementation and wrappers for table implementation This patch adds crc32 algorithms to shash crypto api. One is wrapper to gerneric crc32_le function. Second is crc32 pclmulqdq implementation. It use hardware provided PCLMULQDQ instruction to accelerate the CRC32 disposal. This instruction present from Intel Westmere and AMD Bulldozer CPUs. For intel core i5 I got 450MB/s for table implementation and 2100MB/s for pclmulqdq implementation. Signed-off-by: Alexander Boyko Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 + arch/x86/crypto/crc32-pclmul_asm.S | 247 ++++++++++++++++++++++++++++++++++++ arch/x86/crypto/crc32-pclmul_glue.c | 201 +++++++++++++++++++++++++++++ 3 files changed, 450 insertions(+) create mode 100644 arch/x86/crypto/crc32-pclmul_asm.S create mode 100644 arch/x86/crypto/crc32-pclmul_glue.c (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e0ca7c9ac383..63947a8f9f0f 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o +obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o aes-i586-y := aes-i586-asm_32.o aes_glue.o twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o @@ -52,3 +53,4 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o crc32c-intel-y := crc32c-intel_glue.o crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o +crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S new file mode 100644 index 000000000000..65ea6a624907 --- /dev/null +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -0,0 +1,247 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 + * calculation. + * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) + * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found + * at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2B: Instruction Set Reference, N-Z + * + * Authors: Gregory Prestas + * Alexander Boyko + */ + +#include +#include + + +.align 16 +/* + * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 + * #define CONSTANT_R1 0x154442bd4LL + * + * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 + * #define CONSTANT_R2 0x1c6e41596LL + */ +.Lconstant_R2R1: + .octa 0x00000001c6e415960000000154442bd4 +/* + * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 + * #define CONSTANT_R3 0x1751997d0LL + * + * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e + * #define CONSTANT_R4 0x0ccaa009eLL + */ +.Lconstant_R4R3: + .octa 0x00000000ccaa009e00000001751997d0 +/* + * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 + * #define CONSTANT_R5 0x163cd6124LL + */ +.Lconstant_R5: + .octa 0x00000000000000000000000163cd6124 +.Lconstant_mask32: + .octa 0x000000000000000000000000FFFFFFFF +/* + * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL + * + * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL + * #define CONSTANT_RU 0x1F7011641LL + */ +.Lconstant_RUpoly: + .octa 0x00000001F701164100000001DB710641 + +#define CONSTANT %xmm0 + +#ifdef __x86_64__ +#define BUF %rdi +#define LEN %rsi +#define CRC %edx +#else +#warning Using 32bit code support +#define BUF %eax +#define LEN %edx +#define CRC %ecx +#endif + + + +.text +/** + * Calculate crc32 + * BUF - buffer (16 bytes aligned) + * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63 + * CRC - initial crc32 + * return %eax crc32 + * uint crc32_pclmul_le_16(unsigned char const *buffer, + * size_t len, uint crc32) + */ +.globl crc32_pclmul_le_16 +.align 4, 0x90 +crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */ + movdqa (BUF), %xmm1 + movdqa 0x10(BUF), %xmm2 + movdqa 0x20(BUF), %xmm3 + movdqa 0x30(BUF), %xmm4 + movd CRC, CONSTANT + pxor CONSTANT, %xmm1 + sub $0x40, LEN + add $0x40, BUF +#ifndef __x86_64__ + /* This is for position independent code(-fPIC) support for 32bit */ + call delta +delta: + pop %ecx +#endif + cmp $0x40, LEN + jb less_64 + +#ifdef __x86_64__ + movdqa .Lconstant_R2R1(%rip), CONSTANT +#else + movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT +#endif + +loop_64:/* 64 bytes Full cache line folding */ + prefetchnta 0x40(BUF) + movdqa %xmm1, %xmm5 + movdqa %xmm2, %xmm6 + movdqa %xmm3, %xmm7 +#ifdef __x86_64__ + movdqa %xmm4, %xmm8 +#endif + PCLMULQDQ 00, CONSTANT, %xmm1 + PCLMULQDQ 00, CONSTANT, %xmm2 + PCLMULQDQ 00, CONSTANT, %xmm3 +#ifdef __x86_64__ + PCLMULQDQ 00, CONSTANT, %xmm4 +#endif + PCLMULQDQ 0x11, CONSTANT, %xmm5 + PCLMULQDQ 0x11, CONSTANT, %xmm6 + PCLMULQDQ 0x11, CONSTANT, %xmm7 +#ifdef __x86_64__ + PCLMULQDQ 0x11, CONSTANT, %xmm8 +#endif + pxor %xmm5, %xmm1 + pxor %xmm6, %xmm2 + pxor %xmm7, %xmm3 +#ifdef __x86_64__ + pxor %xmm8, %xmm4 +#else + /* xmm8 unsupported for x32 */ + movdqa %xmm4, %xmm5 + PCLMULQDQ 00, CONSTANT, %xmm4 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm4 +#endif + + pxor (BUF), %xmm1 + pxor 0x10(BUF), %xmm2 + pxor 0x20(BUF), %xmm3 + pxor 0x30(BUF), %xmm4 + + sub $0x40, LEN + add $0x40, BUF + cmp $0x40, LEN + jge loop_64 +less_64:/* Folding cache line into 128bit */ +#ifdef __x86_64__ + movdqa .Lconstant_R4R3(%rip), CONSTANT +#else + movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT +#endif + prefetchnta (BUF) + + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm2, %xmm1 + + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm3, %xmm1 + + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm4, %xmm1 + + cmp $0x10, LEN + jb fold_64 +loop_16:/* Folding rest buffer into 128bit */ + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor (BUF), %xmm1 + sub $0x10, LEN + add $0x10, BUF + cmp $0x10, LEN + jge loop_16 + +fold_64: + /* perform the last 64 bit fold, also adds 32 zeroes + * to the input stream */ + PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */ + psrldq $0x08, %xmm1 + pxor CONSTANT, %xmm1 + + /* final 32-bit fold */ + movdqa %xmm1, %xmm2 +#ifdef __x86_64__ + movdqa .Lconstant_R5(%rip), CONSTANT + movdqa .Lconstant_mask32(%rip), %xmm3 +#else + movdqa .Lconstant_R5 - delta(%ecx), CONSTANT + movdqa .Lconstant_mask32 - delta(%ecx), %xmm3 +#endif + psrldq $0x04, %xmm2 + pand %xmm3, %xmm1 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + pxor %xmm2, %xmm1 + + /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ +#ifdef __x86_64__ + movdqa .Lconstant_RUpoly(%rip), CONSTANT +#else + movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT +#endif + movdqa %xmm1, %xmm2 + pand %xmm3, %xmm1 + PCLMULQDQ 0x10, CONSTANT, %xmm1 + pand %xmm3, %xmm1 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + pxor %xmm2, %xmm1 + pextrd $0x01, %xmm1, %eax + + ret diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c new file mode 100644 index 000000000000..9d014a74ef96 --- /dev/null +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -0,0 +1,201 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation. + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +#define PCLMUL_MIN_LEN 64L /* minimum size of buffer + * for crc32_pclmul_le_16 */ +#define SCALE_F 16L /* size of xmm register */ +#define SCALE_F_MASK (SCALE_F - 1) + +u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32); + +static u32 __attribute__((pure)) + crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) +{ + unsigned int iquotient; + unsigned int iremainder; + unsigned int prealign; + + if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable()) + return crc32_le(crc, p, len); + + if ((long)p & SCALE_F_MASK) { + /* align p to 16 byte */ + prealign = SCALE_F - ((long)p & SCALE_F_MASK); + + crc = crc32_le(crc, p, prealign); + len -= prealign; + p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) & + ~SCALE_F_MASK); + } + iquotient = len & (~SCALE_F_MASK); + iremainder = len & SCALE_F_MASK; + + kernel_fpu_begin(); + crc = crc32_pclmul_le_16(p, iquotient, crc); + kernel_fpu_end(); + + if (iremainder) + crc = crc32_le(crc, p + iquotient, iremainder); + + return crc; +} + +static int crc32_pclmul_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = 0; + + return 0; +} + +static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32_pclmul_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crcp = shash_desc_ctx(desc); + + *crcp = *mctx; + + return 0; +} + +static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + *crcp = crc32_pclmul_le(*crcp, data, len); + return 0; +} + +/* No final XOR 0xFFFFFFFF, like crc32_le */ +static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len)); + return 0; +} + +static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32_pclmul_final(struct shash_desc *desc, u8 *out) +{ + u32 *crcp = shash_desc_ctx(desc); + + *(__le32 *)out = cpu_to_le32p(crcp); + return 0; +} + +static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static struct shash_alg alg = { + .setkey = crc32_pclmul_setkey, + .init = crc32_pclmul_init, + .update = crc32_pclmul_update, + .final = crc32_pclmul_final, + .finup = crc32_pclmul_finup, + .digest = crc32_pclmul_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "crc32", + .cra_driver_name = "crc32-pclmul", + .cra_priority = 200, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_module = THIS_MODULE, + .cra_init = crc32_pclmul_cra_init, + } +}; + +static const struct x86_cpu_id crc32pclmul_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id); + + +static int __init crc32_pclmul_mod_init(void) +{ + + if (!x86_match_cpu(crc32pclmul_cpu_id)) { + pr_info("PCLMULQDQ-NI instructions are not detected.\n"); + return -ENODEV; + } + return crypto_register_shash(&alg); +} + +static void __exit crc32_pclmul_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crc32_pclmul_mod_init); +module_exit(crc32_pclmul_mod_fini); + +MODULE_AUTHOR("Alexander Boyko "); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS("crc32"); +MODULE_ALIAS("crc32-pclmul"); -- cgit v1.2.3-59-g8ed1b From 3f299743839ae0a4c183035c36aa1e2807e53fe4 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:38:50 +0200 Subject: crypto: x86/aes - assembler clean-ups: use ENTRY/ENDPROC, localize jump targets Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-i586-asm_32.S | 15 +++++---------- arch/x86/crypto/aes-x86_64-asm_64.S | 30 +++++++++++++++--------------- 2 files changed, 20 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S index b949ec2f9af4..2849dbc59e11 100644 --- a/arch/x86/crypto/aes-i586-asm_32.S +++ b/arch/x86/crypto/aes-i586-asm_32.S @@ -36,6 +36,7 @@ .file "aes-i586-asm.S" .text +#include #include #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) @@ -219,14 +220,10 @@ // AES (Rijndael) Encryption Subroutine /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ -.global aes_enc_blk - .extern crypto_ft_tab .extern crypto_fl_tab -.align 4 - -aes_enc_blk: +ENTRY(aes_enc_blk) push %ebp mov ctx(%esp),%ebp @@ -290,18 +287,15 @@ aes_enc_blk: mov %r0,(%ebp) pop %ebp ret +ENDPROC(aes_enc_blk) // AES (Rijndael) Decryption Subroutine /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ -.global aes_dec_blk - .extern crypto_it_tab .extern crypto_il_tab -.align 4 - -aes_dec_blk: +ENTRY(aes_dec_blk) push %ebp mov ctx(%esp),%ebp @@ -365,3 +359,4 @@ aes_dec_blk: mov %r0,(%ebp) pop %ebp ret +ENDPROC(aes_dec_blk) diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index 5b577d5a059b..910565547163 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -15,6 +15,7 @@ .text +#include #include #define R1 %rax @@ -49,10 +50,8 @@ #define R11 %r11 #define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ - .global FUNC; \ - .type FUNC,@function; \ - .align 8; \ -FUNC: movq r1,r2; \ + ENTRY(FUNC); \ + movq r1,r2; \ movq r3,r4; \ leaq KEY+48(r8),r9; \ movq r10,r11; \ @@ -71,14 +70,15 @@ FUNC: movq r1,r2; \ je B192; \ leaq 32(r9),r9; -#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ +#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \ movq r1,r2; \ movq r3,r4; \ movl r5 ## E,(r9); \ movl r6 ## E,4(r9); \ movl r7 ## E,8(r9); \ movl r8 ## E,12(r9); \ - ret; + ret; \ + ENDPROC(FUNC); #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ movzbl r2 ## H,r5 ## E; \ @@ -133,7 +133,7 @@ FUNC: movq r1,r2; \ #define entry(FUNC,KEY,B128,B192) \ prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) -#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11) +#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11) #define encrypt_round(TAB,OFFSET) \ round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ @@ -151,12 +151,12 @@ FUNC: movq r1,r2; \ /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ - entry(aes_enc_blk,0,enc128,enc192) + entry(aes_enc_blk,0,.Le128,.Le192) encrypt_round(crypto_ft_tab,-96) encrypt_round(crypto_ft_tab,-80) -enc192: encrypt_round(crypto_ft_tab,-64) +.Le192: encrypt_round(crypto_ft_tab,-64) encrypt_round(crypto_ft_tab,-48) -enc128: encrypt_round(crypto_ft_tab,-32) +.Le128: encrypt_round(crypto_ft_tab,-32) encrypt_round(crypto_ft_tab,-16) encrypt_round(crypto_ft_tab, 0) encrypt_round(crypto_ft_tab, 16) @@ -166,16 +166,16 @@ enc128: encrypt_round(crypto_ft_tab,-32) encrypt_round(crypto_ft_tab, 80) encrypt_round(crypto_ft_tab, 96) encrypt_final(crypto_fl_tab,112) - return + return(aes_enc_blk) /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ - entry(aes_dec_blk,240,dec128,dec192) + entry(aes_dec_blk,240,.Ld128,.Ld192) decrypt_round(crypto_it_tab,-96) decrypt_round(crypto_it_tab,-80) -dec192: decrypt_round(crypto_it_tab,-64) +.Ld192: decrypt_round(crypto_it_tab,-64) decrypt_round(crypto_it_tab,-48) -dec128: decrypt_round(crypto_it_tab,-32) +.Ld128: decrypt_round(crypto_it_tab,-32) decrypt_round(crypto_it_tab,-16) decrypt_round(crypto_it_tab, 0) decrypt_round(crypto_it_tab, 16) @@ -185,4 +185,4 @@ dec128: decrypt_round(crypto_it_tab,-32) decrypt_round(crypto_it_tab, 80) decrypt_round(crypto_it_tab, 96) decrypt_final(crypto_il_tab,112) - return + return(aes_dec_blk) -- cgit v1.2.3-59-g8ed1b From 8309b745bbaf3fe3df7fc67de51d0049b51452c5 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:38:55 +0200 Subject: crypto: aesni-intel - add ENDPROC statements for assembler functions Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 3470624d7835..04b797767b9e 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1262,7 +1262,6 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst * poly = x^128 + x^127 + x^126 + x^121 + 1 * *****************************************************************************/ - ENTRY(aesni_gcm_dec) push %r12 push %r13 @@ -1437,6 +1436,7 @@ _return_T_done_decrypt: pop %r13 pop %r12 ret +ENDPROC(aesni_gcm_dec) /***************************************************************************** @@ -1700,10 +1700,12 @@ _return_T_done_encrypt: pop %r13 pop %r12 ret +ENDPROC(aesni_gcm_enc) #endif +.align 4 _key_expansion_128: _key_expansion_256a: pshufd $0b11111111, %xmm1, %xmm1 @@ -1715,6 +1717,8 @@ _key_expansion_256a: movaps %xmm0, (TKEYP) add $0x10, TKEYP ret +ENDPROC(_key_expansion_128) +ENDPROC(_key_expansion_256a) .align 4 _key_expansion_192a: @@ -1739,6 +1743,7 @@ _key_expansion_192a: movaps %xmm1, 0x10(TKEYP) add $0x20, TKEYP ret +ENDPROC(_key_expansion_192a) .align 4 _key_expansion_192b: @@ -1758,6 +1763,7 @@ _key_expansion_192b: movaps %xmm0, (TKEYP) add $0x10, TKEYP ret +ENDPROC(_key_expansion_192b) .align 4 _key_expansion_256b: @@ -1770,6 +1776,7 @@ _key_expansion_256b: movaps %xmm2, (TKEYP) add $0x10, TKEYP ret +ENDPROC(_key_expansion_256b) /* * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, @@ -1882,6 +1889,7 @@ ENTRY(aesni_set_key) popl KEYP #endif ret +ENDPROC(aesni_set_key) /* * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) @@ -1903,6 +1911,7 @@ ENTRY(aesni_enc) popl KEYP #endif ret +ENDPROC(aesni_enc) /* * _aesni_enc1: internal ABI @@ -1960,6 +1969,7 @@ _aesni_enc1: movaps 0x70(TKEYP), KEY AESENCLAST KEY STATE ret +ENDPROC(_aesni_enc1) /* * _aesni_enc4: internal ABI @@ -2068,6 +2078,7 @@ _aesni_enc4: AESENCLAST KEY STATE3 AESENCLAST KEY STATE4 ret +ENDPROC(_aesni_enc4) /* * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) @@ -2090,6 +2101,7 @@ ENTRY(aesni_dec) popl KEYP #endif ret +ENDPROC(aesni_dec) /* * _aesni_dec1: internal ABI @@ -2147,6 +2159,7 @@ _aesni_dec1: movaps 0x70(TKEYP), KEY AESDECLAST KEY STATE ret +ENDPROC(_aesni_dec1) /* * _aesni_dec4: internal ABI @@ -2255,6 +2268,7 @@ _aesni_dec4: AESDECLAST KEY STATE3 AESDECLAST KEY STATE4 ret +ENDPROC(_aesni_dec4) /* * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2312,6 +2326,7 @@ ENTRY(aesni_ecb_enc) popl LEN #endif ret +ENDPROC(aesni_ecb_enc) /* * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2370,6 +2385,7 @@ ENTRY(aesni_ecb_dec) popl LEN #endif ret +ENDPROC(aesni_ecb_dec) /* * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2411,6 +2427,7 @@ ENTRY(aesni_cbc_enc) popl IVP #endif ret +ENDPROC(aesni_cbc_enc) /* * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2501,6 +2518,7 @@ ENTRY(aesni_cbc_dec) popl IVP #endif ret +ENDPROC(aesni_cbc_dec) #ifdef __x86_64__ .align 16 @@ -2527,6 +2545,7 @@ _aesni_inc_init: MOVQ_R64_XMM TCTR_LOW INC MOVQ_R64_XMM CTR TCTR_LOW ret +ENDPROC(_aesni_inc_init) /* * _aesni_inc: internal ABI @@ -2555,6 +2574,7 @@ _aesni_inc: movaps CTR, IV PSHUFB_XMM BSWAP_MASK IV ret +ENDPROC(_aesni_inc) /* * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2615,4 +2635,5 @@ ENTRY(aesni_ctr_enc) movups IV, (IVP) .Lctr_enc_just_ret: ret +ENDPROC(aesni_ctr_enc) #endif -- cgit v1.2.3-59-g8ed1b From 5186e395fee266bede96b6906773973ed6fa2278 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:00 +0200 Subject: crypto: blowfish-x86_64: use ENTRY()/ENDPROC() for assembler functions and localize jump targets Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/blowfish-x86_64-asm_64.S | 39 ++++++++++++-------------------- 1 file changed, 14 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 391d245dc086..246c67006ed0 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -20,6 +20,8 @@ * */ +#include + .file "blowfish-x86_64-asm.S" .text @@ -116,11 +118,7 @@ bswapq RX0; \ xorq RX0, (RIO); -.align 8 -.global __blowfish_enc_blk -.type __blowfish_enc_blk,@function; - -__blowfish_enc_blk: +ENTRY(__blowfish_enc_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -148,19 +146,16 @@ __blowfish_enc_blk: movq %r10, RIO; test %cl, %cl; - jnz __enc_xor; + jnz .L__enc_xor; write_block(); ret; -__enc_xor: +.L__enc_xor: xor_block(); ret; +ENDPROC(__blowfish_enc_blk) -.align 8 -.global blowfish_dec_blk -.type blowfish_dec_blk,@function; - -blowfish_dec_blk: +ENTRY(blowfish_dec_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -189,6 +184,7 @@ blowfish_dec_blk: movq %r11, %rbp; ret; +ENDPROC(blowfish_dec_blk) /********************************************************************** 4-way blowfish, four blocks parallel @@ -300,11 +296,7 @@ blowfish_dec_blk: bswapq RX3; \ xorq RX3, 24(RIO); -.align 8 -.global __blowfish_enc_blk_4way -.type __blowfish_enc_blk_4way,@function; - -__blowfish_enc_blk_4way: +ENTRY(__blowfish_enc_blk_4way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -336,7 +328,7 @@ __blowfish_enc_blk_4way: movq %r11, RIO; test %bpl, %bpl; - jnz __enc_xor4; + jnz .L__enc_xor4; write_block4(); @@ -344,18 +336,15 @@ __blowfish_enc_blk_4way: popq %rbp; ret; -__enc_xor4: +.L__enc_xor4: xor_block4(); popq %rbx; popq %rbp; ret; +ENDPROC(__blowfish_enc_blk_4way) -.align 8 -.global blowfish_dec_blk_4way -.type blowfish_dec_blk_4way,@function; - -blowfish_dec_blk_4way: +ENTRY(blowfish_dec_blk_4way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -387,4 +376,4 @@ blowfish_dec_blk_4way: popq %rbp; ret; - +ENDPROC(blowfish_dec_blk_4way) -- cgit v1.2.3-59-g8ed1b From 59990684b0d2b5ab57e37141412bc41cb6c9a2e9 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:05 +0200 Subject: crypto: camellia-x86_64/aes-ni: use ENTRY()/ENDPROC() for assembler functions and localize jump targets Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 38 ++++++++-------------- arch/x86/crypto/camellia-x86_64-asm_64.S | 50 +++++++++++++---------------- 2 files changed, 36 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 2306d2e4816f..cfc163469c71 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -15,6 +15,8 @@ * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz */ +#include + #define CAMELLIA_TABLE_BYTE_LEN 272 /* struct camellia_ctx: */ @@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %rcx, (%r9)); ret; +ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) .align 8 roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: @@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, %rax, (%r9)); ret; +ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) /* * IN/OUT: @@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: .text .align 8 -.type __camellia_enc_blk16,@function; - __camellia_enc_blk16: /* input: * %rdi: ctx, CTX @@ -793,10 +795,9 @@ __camellia_enc_blk16: %xmm15, %rax, %rcx, 24); jmp .Lenc_done; +ENDPROC(__camellia_enc_blk16) .align 8 -.type __camellia_dec_blk16,@function; - __camellia_dec_blk16: /* input: * %rdi: ctx, CTX @@ -877,12 +878,9 @@ __camellia_dec_blk16: ((key_table + (24) * 8) + 4)(CTX)); jmp .Ldec_max24; +ENDPROC(__camellia_dec_blk16) -.align 8 -.global camellia_ecb_enc_16way -.type camellia_ecb_enc_16way,@function; - -camellia_ecb_enc_16way: +ENTRY(camellia_ecb_enc_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -903,12 +901,9 @@ camellia_ecb_enc_16way: %xmm8, %rsi); ret; +ENDPROC(camellia_ecb_enc_16way) -.align 8 -.global camellia_ecb_dec_16way -.type camellia_ecb_dec_16way,@function; - -camellia_ecb_dec_16way: +ENTRY(camellia_ecb_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -934,12 +929,9 @@ camellia_ecb_dec_16way: %xmm8, %rsi); ret; +ENDPROC(camellia_ecb_dec_16way) -.align 8 -.global camellia_cbc_dec_16way -.type camellia_cbc_dec_16way,@function; - -camellia_cbc_dec_16way: +ENTRY(camellia_cbc_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -986,6 +978,7 @@ camellia_cbc_dec_16way: %xmm8, %rsi); ret; +ENDPROC(camellia_cbc_dec_16way) #define inc_le128(x, minus_one, tmp) \ vpcmpeqq minus_one, x, tmp; \ @@ -993,11 +986,7 @@ camellia_cbc_dec_16way: vpslldq $8, tmp, tmp; \ vpsubq tmp, x, x; -.align 8 -.global camellia_ctr_16way -.type camellia_ctr_16way,@function; - -camellia_ctr_16way: +ENTRY(camellia_ctr_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -1100,3 +1089,4 @@ camellia_ctr_16way: %xmm8, %rsi); ret; +ENDPROC(camellia_ctr_16way) diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 0b3374335fdc..310319c601ed 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -20,6 +20,8 @@ * */ +#include + .file "camellia-x86_64-asm_64.S" .text @@ -188,10 +190,7 @@ bswapq RAB0; \ movq RAB0, 4*2(RIO); -.global __camellia_enc_blk; -.type __camellia_enc_blk,@function; - -__camellia_enc_blk: +ENTRY(__camellia_enc_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -214,33 +213,31 @@ __camellia_enc_blk: movl $24, RT1d; /* max */ cmpb $16, key_length(CTX); - je __enc_done; + je .L__enc_done; enc_fls(24); enc_rounds(24); movl $32, RT1d; /* max */ -__enc_done: +.L__enc_done: testb RXORbl, RXORbl; movq RDST, RIO; - jnz __enc_xor; + jnz .L__enc_xor; enc_outunpack(mov, RT1); movq RRBP, %rbp; ret; -__enc_xor: +.L__enc_xor: enc_outunpack(xor, RT1); movq RRBP, %rbp; ret; +ENDPROC(__camellia_enc_blk) -.global camellia_dec_blk; -.type camellia_dec_blk,@function; - -camellia_dec_blk: +ENTRY(camellia_dec_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -258,12 +255,12 @@ camellia_dec_blk: dec_inpack(RT2); cmpb $24, RT2bl; - je __dec_rounds16; + je .L__dec_rounds16; dec_rounds(24); dec_fls(24); -__dec_rounds16: +.L__dec_rounds16: dec_rounds(16); dec_fls(16); dec_rounds(8); @@ -276,6 +273,7 @@ __dec_rounds16: movq RRBP, %rbp; ret; +ENDPROC(camellia_dec_blk) /********************************************************************** 2-way camellia @@ -426,10 +424,7 @@ __dec_rounds16: bswapq RAB1; \ movq RAB1, 12*2(RIO); -.global __camellia_enc_blk_2way; -.type __camellia_enc_blk_2way,@function; - -__camellia_enc_blk_2way: +ENTRY(__camellia_enc_blk_2way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -453,16 +448,16 @@ __camellia_enc_blk_2way: movl $24, RT2d; /* max */ cmpb $16, key_length(CTX); - je __enc2_done; + je .L__enc2_done; enc_fls2(24); enc_rounds2(24); movl $32, RT2d; /* max */ -__enc2_done: +.L__enc2_done: test RXORbl, RXORbl; movq RDST, RIO; - jnz __enc2_xor; + jnz .L__enc2_xor; enc_outunpack2(mov, RT2); @@ -470,17 +465,15 @@ __enc2_done: popq %rbx; ret; -__enc2_xor: +.L__enc2_xor: enc_outunpack2(xor, RT2); movq RRBP, %rbp; popq %rbx; ret; +ENDPROC(__camellia_enc_blk_2way) -.global camellia_dec_blk_2way; -.type camellia_dec_blk_2way,@function; - -camellia_dec_blk_2way: +ENTRY(camellia_dec_blk_2way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -499,12 +492,12 @@ camellia_dec_blk_2way: dec_inpack2(RT2); cmpb $24, RT2bl; - je __dec2_rounds16; + je .L__dec2_rounds16; dec_rounds2(24); dec_fls2(24); -__dec2_rounds16: +.L__dec2_rounds16: dec_rounds2(16); dec_fls2(16); dec_rounds2(8); @@ -518,3 +511,4 @@ __dec2_rounds16: movq RRBP, %rbp; movq RXOR, %rbx; ret; +ENDPROC(camellia_dec_blk_2way) -- cgit v1.2.3-59-g8ed1b From e17e209ea44ae69bcfdcfacd6974cf48d04e6f71 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:11 +0200 Subject: crypto: cast5-avx: use ENTRY()/ENDPROC() for assembler functions and localize jump targets Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 48 ++++++++++++------------------- 1 file changed, 18 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index 15b00ac7cbd3..c35fd5d6ecd2 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -23,6 +23,8 @@ * */ +#include + .file "cast5-avx-x86_64-asm_64.S" .extern cast_s1 @@ -211,8 +213,6 @@ .text .align 16 -.type __cast5_enc_blk16,@function; - __cast5_enc_blk16: /* input: * %rdi: ctx, CTX @@ -263,14 +263,14 @@ __cast5_enc_blk16: movzbl rr(CTX), %eax; testl %eax, %eax; - jnz __skip_enc; + jnz .L__skip_enc; round(RL, RR, 12, 1); round(RR, RL, 13, 2); round(RL, RR, 14, 3); round(RR, RL, 15, 1); -__skip_enc: +.L__skip_enc: popq %rbx; popq %rbp; @@ -282,10 +282,9 @@ __skip_enc: outunpack_blocks(RR4, RL4, RTMP, RX, RKM); ret; +ENDPROC(__cast5_enc_blk16) .align 16 -.type __cast5_dec_blk16,@function; - __cast5_dec_blk16: /* input: * %rdi: ctx, CTX @@ -323,14 +322,14 @@ __cast5_dec_blk16: movzbl rr(CTX), %eax; testl %eax, %eax; - jnz __skip_dec; + jnz .L__skip_dec; round(RL, RR, 15, 1); round(RR, RL, 14, 3); round(RL, RR, 13, 2); round(RR, RL, 12, 1); -__dec_tail: +.L__dec_tail: round(RL, RR, 11, 3); round(RR, RL, 10, 2); round(RL, RR, 9, 1); @@ -355,15 +354,12 @@ __dec_tail: ret; -__skip_dec: +.L__skip_dec: vpsrldq $4, RKR, RKR; - jmp __dec_tail; + jmp .L__dec_tail; +ENDPROC(__cast5_dec_blk16) -.align 16 -.global cast5_ecb_enc_16way -.type cast5_ecb_enc_16way,@function; - -cast5_ecb_enc_16way: +ENTRY(cast5_ecb_enc_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -393,12 +389,9 @@ cast5_ecb_enc_16way: vmovdqu RL4, (7*4*4)(%r11); ret; +ENDPROC(cast5_ecb_enc_16way) -.align 16 -.global cast5_ecb_dec_16way -.type cast5_ecb_dec_16way,@function; - -cast5_ecb_dec_16way: +ENTRY(cast5_ecb_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -428,12 +421,9 @@ cast5_ecb_dec_16way: vmovdqu RL4, (7*4*4)(%r11); ret; +ENDPROC(cast5_ecb_dec_16way) -.align 16 -.global cast5_cbc_dec_16way -.type cast5_cbc_dec_16way,@function; - -cast5_cbc_dec_16way: +ENTRY(cast5_cbc_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -480,12 +470,9 @@ cast5_cbc_dec_16way: popq %r12; ret; +ENDPROC(cast5_cbc_dec_16way) -.align 16 -.global cast5_ctr_16way -.type cast5_ctr_16way,@function; - -cast5_ctr_16way: +ENTRY(cast5_ctr_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -556,3 +543,4 @@ cast5_ctr_16way: popq %r12; ret; +ENDPROC(cast5_ctr_16way) -- cgit v1.2.3-59-g8ed1b From 1985fecf019dae1db78c90ef9af435e1462e7766 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:16 +0200 Subject: crypto: cast6-avx: use ENTRY()/ENDPROC() for assembler functions Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 35 ++++++++++--------------------- 1 file changed, 11 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 2569d0da841f..f93b6105a0ce 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -23,6 +23,7 @@ * */ +#include #include "glue_helper-asm-avx.S" .file "cast6-avx-x86_64-asm_64.S" @@ -250,8 +251,6 @@ .text .align 8 -.type __cast6_enc_blk8,@function; - __cast6_enc_blk8: /* input: * %rdi: ctx, CTX @@ -295,10 +294,9 @@ __cast6_enc_blk8: outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); ret; +ENDPROC(__cast6_enc_blk8) .align 8 -.type __cast6_dec_blk8,@function; - __cast6_dec_blk8: /* input: * %rdi: ctx, CTX @@ -341,12 +339,9 @@ __cast6_dec_blk8: outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); ret; +ENDPROC(__cast6_dec_blk8) -.align 8 -.global cast6_ecb_enc_8way -.type cast6_ecb_enc_8way,@function; - -cast6_ecb_enc_8way: +ENTRY(cast6_ecb_enc_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -362,12 +357,9 @@ cast6_ecb_enc_8way: store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(cast6_ecb_enc_8way) -.align 8 -.global cast6_ecb_dec_8way -.type cast6_ecb_dec_8way,@function; - -cast6_ecb_dec_8way: +ENTRY(cast6_ecb_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -383,12 +375,9 @@ cast6_ecb_dec_8way: store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(cast6_ecb_dec_8way) -.align 8 -.global cast6_cbc_dec_8way -.type cast6_cbc_dec_8way,@function; - -cast6_cbc_dec_8way: +ENTRY(cast6_cbc_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -409,12 +398,9 @@ cast6_cbc_dec_8way: popq %r12; ret; +ENDPROC(cast6_cbc_dec_8way) -.align 8 -.global cast6_ctr_8way -.type cast6_ctr_8way,@function; - -cast6_ctr_8way: +ENTRY(cast6_ctr_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -437,3 +423,4 @@ cast6_ctr_8way: popq %r12; ret; +ENDPROC(cast6_ctr_8way) -- cgit v1.2.3-59-g8ed1b From 698a5abbb0c15ac273bf02f55a1385725714353a Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:21 +0200 Subject: crypto: x86/crc32c - assembler clean-up: use ENTRY/ENDPROC Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 93c6d39237ac..cf1a7ec4cc3a 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -42,6 +42,8 @@ * SOFTWARE. */ +#include + ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction .macro LABEL prefix n @@ -68,8 +70,7 @@ # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); -.global crc_pcl -crc_pcl: +ENTRY(crc_pcl) #define bufp %rdi #define bufp_dw %edi #define bufp_w %di @@ -323,6 +324,9 @@ JMPTBL_ENTRY %i .noaltmacro i=i+1 .endr + +ENDPROC(crc_pcl) + ################################################################ ## PCLMULQDQ tables ## Table is 128 entries x 2 quad words each -- cgit v1.2.3-59-g8ed1b From b05d3f375676e57672ac5a9090cb1068fab8b85f Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:26 +0200 Subject: crypto: x86/ghash - assembler clean-up: use ENDPROC at end of assember functions Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_asm.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 1eb7f90cb7b9..586f41aac361 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -94,6 +94,7 @@ __clmul_gf128mul_ble: pxor T2, T1 pxor T1, DATA ret +ENDPROC(__clmul_gf128mul_ble) /* void clmul_ghash_mul(char *dst, const be128 *shash) */ ENTRY(clmul_ghash_mul) @@ -105,6 +106,7 @@ ENTRY(clmul_ghash_mul) PSHUFB_XMM BSWAP DATA movups DATA, (%rdi) ret +ENDPROC(clmul_ghash_mul) /* * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, @@ -131,6 +133,7 @@ ENTRY(clmul_ghash_update) movups DATA, (%rdi) .Lupdate_just_ret: ret +ENDPROC(clmul_ghash_update) /* * void clmul_ghash_setkey(be128 *shash, const u8 *key); @@ -155,3 +158,4 @@ ENTRY(clmul_ghash_setkey) pxor %xmm1, %xmm0 movups %xmm0, (%rdi) ret +ENDPROC(clmul_ghash_setkey) -- cgit v1.2.3-59-g8ed1b From 044438082cf1447e37534b24beff723835464954 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:31 +0200 Subject: crypto: x86/salsa20 - assembler cleanup, use ENTRY/ENDPROC for assember functions and rename ECRYPT_* to salsa20_* Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/salsa20-i586-asm_32.S | 28 ++++++++++++++-------------- arch/x86/crypto/salsa20-x86_64-asm_64.S | 28 +++++++++++++--------------- arch/x86/crypto/salsa20_glue.c | 5 ----- 3 files changed, 27 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S index 72eb306680b2..329452b8f794 100644 --- a/arch/x86/crypto/salsa20-i586-asm_32.S +++ b/arch/x86/crypto/salsa20-i586-asm_32.S @@ -2,11 +2,12 @@ # D. J. Bernstein # Public domain. -# enter ECRYPT_encrypt_bytes +#include + .text -.p2align 5 -.globl ECRYPT_encrypt_bytes -ECRYPT_encrypt_bytes: + +# enter salsa20_encrypt_bytes +ENTRY(salsa20_encrypt_bytes) mov %esp,%eax and $31,%eax add $256,%eax @@ -933,11 +934,10 @@ ECRYPT_encrypt_bytes: add $64,%esi # goto bytesatleast1 jmp ._bytesatleast1 -# enter ECRYPT_keysetup -.text -.p2align 5 -.globl ECRYPT_keysetup -ECRYPT_keysetup: +ENDPROC(salsa20_encrypt_bytes) + +# enter salsa20_keysetup +ENTRY(salsa20_keysetup) mov %esp,%eax and $31,%eax add $256,%eax @@ -1060,11 +1060,10 @@ ECRYPT_keysetup: # leave add %eax,%esp ret -# enter ECRYPT_ivsetup -.text -.p2align 5 -.globl ECRYPT_ivsetup -ECRYPT_ivsetup: +ENDPROC(salsa20_keysetup) + +# enter salsa20_ivsetup +ENTRY(salsa20_ivsetup) mov %esp,%eax and $31,%eax add $256,%eax @@ -1112,3 +1111,4 @@ ECRYPT_ivsetup: # leave add %eax,%esp ret +ENDPROC(salsa20_ivsetup) diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S index 6214a9b09706..9279e0b2d60e 100644 --- a/arch/x86/crypto/salsa20-x86_64-asm_64.S +++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S @@ -1,8 +1,7 @@ -# enter ECRYPT_encrypt_bytes -.text -.p2align 5 -.globl ECRYPT_encrypt_bytes -ECRYPT_encrypt_bytes: +#include + +# enter salsa20_encrypt_bytes +ENTRY(salsa20_encrypt_bytes) mov %rsp,%r11 and $31,%r11 add $256,%r11 @@ -802,11 +801,10 @@ ECRYPT_encrypt_bytes: # comment:fp stack unchanged by jump # goto bytesatleast1 jmp ._bytesatleast1 -# enter ECRYPT_keysetup -.text -.p2align 5 -.globl ECRYPT_keysetup -ECRYPT_keysetup: +ENDPROC(salsa20_encrypt_bytes) + +# enter salsa20_keysetup +ENTRY(salsa20_keysetup) mov %rsp,%r11 and $31,%r11 add $256,%r11 @@ -892,11 +890,10 @@ ECRYPT_keysetup: mov %rdi,%rax mov %rsi,%rdx ret -# enter ECRYPT_ivsetup -.text -.p2align 5 -.globl ECRYPT_ivsetup -ECRYPT_ivsetup: +ENDPROC(salsa20_keysetup) + +# enter salsa20_ivsetup +ENTRY(salsa20_ivsetup) mov %rsp,%r11 and $31,%r11 add $256,%r11 @@ -918,3 +915,4 @@ ECRYPT_ivsetup: mov %rdi,%rax mov %rsi,%rdx ret +ENDPROC(salsa20_ivsetup) diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index a3a3c0205c16..5e8e67739bb5 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -26,11 +26,6 @@ #define SALSA20_MIN_KEY_SIZE 16U #define SALSA20_MAX_KEY_SIZE 32U -// use the ECRYPT_* function names -#define salsa20_keysetup ECRYPT_keysetup -#define salsa20_ivsetup ECRYPT_ivsetup -#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes - struct salsa20_ctx { u32 input[16]; -- cgit v1.2.3-59-g8ed1b From 2dcfd44dee3fd3a63e3e3d3f5cbfd2436d1f98a6 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:36 +0200 Subject: crypto: x86/serpent - use ENTRY/ENDPROC for assember functions and localize jump targets Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 35 +++++++++------------------- arch/x86/crypto/serpent-sse2-i586-asm_32.S | 20 +++++++--------- arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | 20 +++++++--------- 3 files changed, 27 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 02b0e9fe997c..43c938612b74 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -24,6 +24,7 @@ * */ +#include #include "glue_helper-asm-avx.S" .file "serpent-avx-x86_64-asm_64.S" @@ -566,8 +567,6 @@ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) .align 8 -.type __serpent_enc_blk8_avx,@function; - __serpent_enc_blk8_avx: /* input: * %rdi: ctx, CTX @@ -619,10 +618,9 @@ __serpent_enc_blk8_avx: write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; +ENDPROC(__serpent_enc_blk8_avx) .align 8 -.type __serpent_dec_blk8_avx,@function; - __serpent_dec_blk8_avx: /* input: * %rdi: ctx, CTX @@ -674,12 +672,9 @@ __serpent_dec_blk8_avx: write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); ret; +ENDPROC(__serpent_dec_blk8_avx) -.align 8 -.global serpent_ecb_enc_8way_avx -.type serpent_ecb_enc_8way_avx,@function; - -serpent_ecb_enc_8way_avx: +ENTRY(serpent_ecb_enc_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -693,12 +688,9 @@ serpent_ecb_enc_8way_avx: store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(serpent_ecb_enc_8way_avx) -.align 8 -.global serpent_ecb_dec_8way_avx -.type serpent_ecb_dec_8way_avx,@function; - -serpent_ecb_dec_8way_avx: +ENTRY(serpent_ecb_dec_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -712,12 +704,9 @@ serpent_ecb_dec_8way_avx: store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); ret; +ENDPROC(serpent_ecb_dec_8way_avx) -.align 8 -.global serpent_cbc_dec_8way_avx -.type serpent_cbc_dec_8way_avx,@function; - -serpent_cbc_dec_8way_avx: +ENTRY(serpent_cbc_dec_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -731,12 +720,9 @@ serpent_cbc_dec_8way_avx: store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); ret; +ENDPROC(serpent_cbc_dec_8way_avx) -.align 8 -.global serpent_ctr_8way_avx -.type serpent_ctr_8way_avx,@function; - -serpent_ctr_8way_avx: +ENTRY(serpent_ctr_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -752,3 +738,4 @@ serpent_ctr_8way_avx: store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(serpent_ctr_8way_avx) diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S index c00053d42f99..d348f1553a79 100644 --- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S @@ -24,6 +24,8 @@ * */ +#include + .file "serpent-sse2-i586-asm_32.S" .text @@ -510,11 +512,7 @@ pxor t0, x3; \ movdqu x3, (3*4*4)(out); -.align 8 -.global __serpent_enc_blk_4way -.type __serpent_enc_blk_4way,@function; - -__serpent_enc_blk_4way: +ENTRY(__serpent_enc_blk_4way) /* input: * arg_ctx(%esp): ctx, CTX * arg_dst(%esp): dst @@ -566,22 +564,19 @@ __serpent_enc_blk_4way: movl arg_dst(%esp), %eax; cmpb $0, arg_xor(%esp); - jnz __enc_xor4; + jnz .L__enc_xor4; write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); ret; -__enc_xor4: +.L__enc_xor4: xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); ret; +ENDPROC(__serpent_enc_blk_4way) -.align 8 -.global serpent_dec_blk_4way -.type serpent_dec_blk_4way,@function; - -serpent_dec_blk_4way: +ENTRY(serpent_dec_blk_4way) /* input: * arg_ctx(%esp): ctx, CTX * arg_dst(%esp): dst @@ -633,3 +628,4 @@ serpent_dec_blk_4way: write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); ret; +ENDPROC(serpent_dec_blk_4way) diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S index 3ee1ff04d3e9..acc066c7c6b2 100644 --- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S @@ -24,6 +24,8 @@ * */ +#include + .file "serpent-sse2-x86_64-asm_64.S" .text @@ -632,11 +634,7 @@ pxor t0, x3; \ movdqu x3, (3*4*4)(out); -.align 8 -.global __serpent_enc_blk_8way -.type __serpent_enc_blk_8way,@function; - -__serpent_enc_blk_8way: +ENTRY(__serpent_enc_blk_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -687,24 +685,21 @@ __serpent_enc_blk_8way: leaq (4*4*4)(%rsi), %rax; testb %cl, %cl; - jnz __enc_xor8; + jnz .L__enc_xor8; write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; -__enc_xor8: +.L__enc_xor8: xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; +ENDPROC(__serpent_enc_blk_8way) -.align 8 -.global serpent_dec_blk_8way -.type serpent_dec_blk_8way,@function; - -serpent_dec_blk_8way: +ENTRY(serpent_dec_blk_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -756,3 +751,4 @@ serpent_dec_blk_8way: write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); ret; +ENDPROC(serpent_dec_blk_8way) -- cgit v1.2.3-59-g8ed1b From ac9d55dd42858b127bedb84f4e59789958263d37 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:41 +0200 Subject: crypto: x86/sha1 - assembler clean-ups: use ENTRY/ENDPROC Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_ssse3_asm.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index 49d6987a73d9..a4109506a5e8 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -28,6 +28,8 @@ * (at your option) any later version. */ +#include + #define CTX %rdi // arg1 #define BUF %rsi // arg2 #define CNT %rdx // arg3 @@ -69,10 +71,8 @@ * param: function's name */ .macro SHA1_VECTOR_ASM name - .global \name - .type \name, @function - .align 32 -\name: + ENTRY(\name) + push %rbx push %rbp push %r12 @@ -106,7 +106,7 @@ pop %rbx ret - .size \name, .-\name + ENDPROC(\name) .endm /* -- cgit v1.2.3-59-g8ed1b From d3f5188dfea70e7ea6570bd4bc9d6d7dbd431e39 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:46 +0200 Subject: crypto: x86/twofish - assembler clean-ups: use ENTRY/ENDPROC, localize jump labels Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 35 +++++++++------------------- arch/x86/crypto/twofish-i586-asm_32.S | 11 ++++----- arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 20 +++++++--------- arch/x86/crypto/twofish-x86_64-asm_64.S | 11 ++++----- 4 files changed, 29 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index ebac16bfa830..8d3e113b2c95 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -23,6 +23,7 @@ * */ +#include #include "glue_helper-asm-avx.S" .file "twofish-avx-x86_64-asm_64.S" @@ -243,8 +244,6 @@ vpxor x3, wkey, x3; .align 8 -.type __twofish_enc_blk8,@function; - __twofish_enc_blk8: /* input: * %rdi: ctx, CTX @@ -284,10 +283,9 @@ __twofish_enc_blk8: outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); ret; +ENDPROC(__twofish_enc_blk8) .align 8 -.type __twofish_dec_blk8,@function; - __twofish_dec_blk8: /* input: * %rdi: ctx, CTX @@ -325,12 +323,9 @@ __twofish_dec_blk8: outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); ret; +ENDPROC(__twofish_dec_blk8) -.align 8 -.global twofish_ecb_enc_8way -.type twofish_ecb_enc_8way,@function; - -twofish_ecb_enc_8way: +ENTRY(twofish_ecb_enc_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -346,12 +341,9 @@ twofish_ecb_enc_8way: store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); ret; +ENDPROC(twofish_ecb_enc_8way) -.align 8 -.global twofish_ecb_dec_8way -.type twofish_ecb_dec_8way,@function; - -twofish_ecb_dec_8way: +ENTRY(twofish_ecb_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -367,12 +359,9 @@ twofish_ecb_dec_8way: store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(twofish_ecb_dec_8way) -.align 8 -.global twofish_cbc_dec_8way -.type twofish_cbc_dec_8way,@function; - -twofish_cbc_dec_8way: +ENTRY(twofish_cbc_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -393,12 +382,9 @@ twofish_cbc_dec_8way: popq %r12; ret; +ENDPROC(twofish_cbc_dec_8way) -.align 8 -.global twofish_ctr_8way -.type twofish_ctr_8way,@function; - -twofish_ctr_8way: +ENTRY(twofish_ctr_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -421,3 +407,4 @@ twofish_ctr_8way: popq %r12; ret; +ENDPROC(twofish_ctr_8way) diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S index 658af4bb35c9..694ea4587ba7 100644 --- a/arch/x86/crypto/twofish-i586-asm_32.S +++ b/arch/x86/crypto/twofish-i586-asm_32.S @@ -20,6 +20,7 @@ .file "twofish-i586-asm.S" .text +#include #include /* return address at 0 */ @@ -219,11 +220,7 @@ xor %esi, d ## D;\ ror $1, d ## D; -.align 4 -.global twofish_enc_blk -.global twofish_dec_blk - -twofish_enc_blk: +ENTRY(twofish_enc_blk) push %ebp /* save registers according to calling convention*/ push %ebx push %esi @@ -277,8 +274,9 @@ twofish_enc_blk: pop %ebp mov $1, %eax ret +ENDPROC(twofish_enc_blk) -twofish_dec_blk: +ENTRY(twofish_dec_blk) push %ebp /* save registers according to calling convention*/ push %ebx push %esi @@ -333,3 +331,4 @@ twofish_dec_blk: pop %ebp mov $1, %eax ret +ENDPROC(twofish_dec_blk) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S index 5b012a2c5119..1c3b7ceb36d2 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -20,6 +20,8 @@ * */ +#include + .file "twofish-x86_64-asm-3way.S" .text @@ -214,11 +216,7 @@ rorq $32, RAB2; \ outunpack3(mov, RIO, 2, RAB, 2); -.align 8 -.global __twofish_enc_blk_3way -.type __twofish_enc_blk_3way,@function; - -__twofish_enc_blk_3way: +ENTRY(__twofish_enc_blk_3way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -250,7 +248,7 @@ __twofish_enc_blk_3way: popq %rbp; /* bool xor */ testb %bpl, %bpl; - jnz __enc_xor3; + jnz .L__enc_xor3; outunpack_enc3(mov); @@ -262,7 +260,7 @@ __twofish_enc_blk_3way: popq %r15; ret; -__enc_xor3: +.L__enc_xor3: outunpack_enc3(xor); popq %rbx; @@ -272,11 +270,9 @@ __enc_xor3: popq %r14; popq %r15; ret; +ENDPROC(__twofish_enc_blk_3way) -.global twofish_dec_blk_3way -.type twofish_dec_blk_3way,@function; - -twofish_dec_blk_3way: +ENTRY(twofish_dec_blk_3way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -313,4 +309,4 @@ twofish_dec_blk_3way: popq %r14; popq %r15; ret; - +ENDPROC(twofish_dec_blk_3way) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index 7bcf3fcc3668..a039d21986a2 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S @@ -20,6 +20,7 @@ .file "twofish-x86_64-asm.S" .text +#include #include #define a_offset 0 @@ -214,11 +215,7 @@ xor %r8d, d ## D;\ ror $1, d ## D; -.align 8 -.global twofish_enc_blk -.global twofish_dec_blk - -twofish_enc_blk: +ENTRY(twofish_enc_blk) pushq R1 /* %rdi contains the ctx address */ @@ -269,8 +266,9 @@ twofish_enc_blk: popq R1 movq $1,%rax ret +ENDPROC(twofish_enc_blk) -twofish_dec_blk: +ENTRY(twofish_dec_blk) pushq R1 /* %rdi contains the ctx address */ @@ -320,3 +318,4 @@ twofish_dec_blk: popq R1 movq $1,%rax ret +ENDPROC(twofish_dec_blk) -- cgit v1.2.3-59-g8ed1b From 7983627657db5e37594af5c28cdb623855eb554f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 20 Jan 2013 18:05:02 +1100 Subject: crypto: crc32-pclmul - Kill warning on x86-32 This patch removes a gratuitous warning on x86-32: arch/x86/crypto/crc32-pclmul_asm.S:87:2: warning: #warning Using 32bit code support [-Wcpp] Signed-off-by: Herbert Xu --- arch/x86/crypto/crc32-pclmul_asm.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S index 65ea6a624907..c8335014a044 100644 --- a/arch/x86/crypto/crc32-pclmul_asm.S +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -84,7 +84,6 @@ #define LEN %rsi #define CRC %edx #else -#warning Using 32bit code support #define BUF %eax #define LEN %edx #define CRC %ecx -- cgit v1.2.3-59-g8ed1b