From bbe2f94acf37c111f86f2583aa3e390b6c0fc279 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Sun, 10 Dec 2017 22:58:16 +0100 Subject: chacha20poly1305: wire up avx512vl for skylake-x --- src/compat/Kbuild.include | 4 ++ src/compat/compat.h | 7 ++- src/compat/intel-family/include/asm/intel-family.h | 73 ++++++++++++++++++++++ src/crypto/blake2s.c | 2 +- src/crypto/chacha20poly1305.c | 19 +++++- 5 files changed, 99 insertions(+), 6 deletions(-) create mode 100644 src/compat/intel-family/include/asm/intel-family.h diff --git a/src/compat/Kbuild.include b/src/compat/Kbuild.include index b7930b6..ce319d6 100644 --- a/src/compat/Kbuild.include +++ b/src/compat/Kbuild.include @@ -24,6 +24,10 @@ ccflags-y += -I$(src)/compat/dst_cache/include wireguard-y += compat/dst_cache/dst_cache.o endif +ifeq ($(wildcard $(srctree)/arch/x86/include/asm/intel-family.h),) +ccflags-y += -I$(src)/compat/intel-family/include +endif + ifeq ($(wildcard $(srctree)/arch/x86/include/asm/fpu/api.h),) ccflags-y += -I$(src)/compat/fpu/include endif diff --git a/src/compat/compat.h b/src/compat/compat.h index f4716d1..859d417 100644 --- a/src/compat/compat.h +++ b/src/compat/compat.h @@ -530,8 +530,11 @@ static inline int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_n #ifndef XFEATURE_MASK_SSE #define XFEATURE_MASK_SSE XSTATE_SSE #endif -#ifndef XFEATURE_MASK_ZMM_Hi256 -#define XFEATURE_MASK_ZMM_Hi256 XSTATE_ZMM_Hi256 +#ifndef XSTATE_AVX512 +#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) +#endif +#ifndef XFEATURE_MASK_AVX512 +#define XFEATURE_MASK_AVX512 XSTATE_AVX512 #endif #endif diff --git a/src/compat/intel-family/include/asm/intel-family.h b/src/compat/intel-family/include/asm/intel-family.h new file mode 100644 index 0000000..35a6bc4 --- /dev/null +++ b/src/compat/intel-family/include/asm/intel-family.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INTEL_FAMILY_H +#define _ASM_X86_INTEL_FAMILY_H + +/* + * "Big Core" Processors (Branded as Core, Xeon, etc...) + * + * The "_X" parts are generally the EP and EX Xeons, or the + * "Extreme" ones, like Broadwell-E. + * + * Things ending in "2" are usually because we have no better + * name for them. There's no processor called "SILVERMONT2". + */ + +#define INTEL_FAM6_CORE_YONAH 0x0E + +#define INTEL_FAM6_CORE2_MEROM 0x0F +#define INTEL_FAM6_CORE2_MEROM_L 0x16 +#define INTEL_FAM6_CORE2_PENRYN 0x17 +#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D + +#define INTEL_FAM6_NEHALEM 0x1E +#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */ +#define INTEL_FAM6_NEHALEM_EP 0x1A +#define INTEL_FAM6_NEHALEM_EX 0x2E + +#define INTEL_FAM6_WESTMERE 0x25 +#define INTEL_FAM6_WESTMERE_EP 0x2C +#define INTEL_FAM6_WESTMERE_EX 0x2F + +#define INTEL_FAM6_SANDYBRIDGE 0x2A +#define INTEL_FAM6_SANDYBRIDGE_X 0x2D +#define INTEL_FAM6_IVYBRIDGE 0x3A +#define INTEL_FAM6_IVYBRIDGE_X 0x3E + +#define INTEL_FAM6_HASWELL_CORE 0x3C +#define INTEL_FAM6_HASWELL_X 0x3F +#define INTEL_FAM6_HASWELL_ULT 0x45 +#define INTEL_FAM6_HASWELL_GT3E 0x46 + +#define INTEL_FAM6_BROADWELL_CORE 0x3D +#define INTEL_FAM6_BROADWELL_GT3E 0x47 +#define INTEL_FAM6_BROADWELL_X 0x4F +#define INTEL_FAM6_BROADWELL_XEON_D 0x56 + +#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E +#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E +#define INTEL_FAM6_SKYLAKE_X 0x55 +#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E +#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E + +/* "Small Core" Processors (Atom) */ + +#define INTEL_FAM6_ATOM_PINEVIEW 0x1C +#define INTEL_FAM6_ATOM_LINCROFT 0x26 +#define INTEL_FAM6_ATOM_PENWELL 0x27 +#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35 +#define INTEL_FAM6_ATOM_CEDARVIEW 0x36 +#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */ +#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */ +#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */ +#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ +#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ +#define INTEL_FAM6_ATOM_GOLDMONT 0x5C +#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ +#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A + +/* Xeon Phi */ + +#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ +#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ + +#endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/src/crypto/blake2s.c b/src/crypto/blake2s.c index 1f4052d..c0b95be 100644 --- a/src/crypto/blake2s.c +++ b/src/crypto/blake2s.c @@ -121,7 +121,7 @@ void __init blake2s_fpu_init(void) { blake2s_use_avx = boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); #ifndef COMPAT_CANNOT_USE_AVX512 - blake2s_use_avx512 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512VL) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_ZMM_Hi256, NULL); + blake2s_use_avx512 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512VL) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL); #endif } #ifdef CONFIG_AS_AVX diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c index bde923b..4173973 100644 --- a/src/crypto/chacha20poly1305.c +++ b/src/crypto/chacha20poly1305.c @@ -16,6 +16,7 @@ #if defined(CONFIG_X86_64) #include #include +#include asmlinkage void poly1305_init_x86_64(void *ctx, const u8 key[16]); asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, size_t len, u32 padbit); asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[16], const u32 nonce[4]); @@ -41,14 +42,21 @@ static bool chacha20poly1305_use_ssse3 __read_mostly; static bool chacha20poly1305_use_avx __read_mostly; static bool chacha20poly1305_use_avx2 __read_mostly; static bool chacha20poly1305_use_avx512 __read_mostly; +static bool chacha20poly1305_use_avx512vl __read_mostly; void __init chacha20poly1305_fpu_init(void) { chacha20poly1305_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3); - chacha20poly1305_use_avx = boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); - chacha20poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); + chacha20poly1305_use_avx = boot_cpu_has(X86_FEATURE_AVX) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); + chacha20poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); #ifndef COMPAT_CANNOT_USE_AVX512 - chacha20poly1305_use_avx512 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_ZMM_Hi256, NULL); + chacha20poly1305_use_avx512 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) && + boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X; + chacha20poly1305_use_avx512vl = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512VL) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL); #endif } #elif defined(CONFIG_ARM) || defined(CONFIG_ARM64) @@ -228,6 +236,11 @@ static void chacha20_crypt(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32 ctx->state[12] += (bytes + 63) / 64; return; } + if (chacha20poly1305_use_avx512vl) { + chacha20_avx512vl(dst, src, bytes, &ctx->state[4], &ctx->state[12]); + ctx->state[12] += (bytes + 63) / 64; + return; + } #endif #ifdef CONFIG_AS_AVX2 if (chacha20poly1305_use_avx2) { -- cgit v1.2.3-59-g8ed1b