aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2017-12-10 22:58:16 +0100
committerJason A. Donenfeld <Jason@zx2c4.com>2017-12-11 00:25:22 +0100
commitbbe2f94acf37c111f86f2583aa3e390b6c0fc279 (patch)
treea57695405ac817893a678296f325089bb1643ac1
parentchacha20: avx512vl implementation (diff)
downloadwireguard-monolithic-historical-bbe2f94acf37c111f86f2583aa3e390b6c0fc279.tar.xz
wireguard-monolithic-historical-bbe2f94acf37c111f86f2583aa3e390b6c0fc279.zip
chacha20poly1305: wire up avx512vl for skylake-x
-rw-r--r--src/compat/Kbuild.include4
-rw-r--r--src/compat/compat.h7
-rw-r--r--src/compat/intel-family/include/asm/intel-family.h73
-rw-r--r--src/crypto/blake2s.c2
-rw-r--r--src/crypto/chacha20poly1305.c19
5 files changed, 99 insertions, 6 deletions
diff --git a/src/compat/Kbuild.include b/src/compat/Kbuild.include
index b7930b6..ce319d6 100644
--- a/src/compat/Kbuild.include
+++ b/src/compat/Kbuild.include
@@ -24,6 +24,10 @@ ccflags-y += -I$(src)/compat/dst_cache/include
wireguard-y += compat/dst_cache/dst_cache.o
endif
+ifeq ($(wildcard $(srctree)/arch/x86/include/asm/intel-family.h),)
+ccflags-y += -I$(src)/compat/intel-family/include
+endif
+
ifeq ($(wildcard $(srctree)/arch/x86/include/asm/fpu/api.h),)
ccflags-y += -I$(src)/compat/fpu/include
endif
diff --git a/src/compat/compat.h b/src/compat/compat.h
index f4716d1..859d417 100644
--- a/src/compat/compat.h
+++ b/src/compat/compat.h
@@ -530,8 +530,11 @@ static inline int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_n
#ifndef XFEATURE_MASK_SSE
#define XFEATURE_MASK_SSE XSTATE_SSE
#endif
-#ifndef XFEATURE_MASK_ZMM_Hi256
-#define XFEATURE_MASK_ZMM_Hi256 XSTATE_ZMM_Hi256
+#ifndef XSTATE_AVX512
+#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+#endif
+#ifndef XFEATURE_MASK_AVX512
+#define XFEATURE_MASK_AVX512 XSTATE_AVX512
#endif
#endif
diff --git a/src/compat/intel-family/include/asm/intel-family.h b/src/compat/intel-family/include/asm/intel-family.h
new file mode 100644
index 0000000..35a6bc4
--- /dev/null
+++ b/src/compat/intel-family/include/asm/intel-family.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_INTEL_FAMILY_H
+#define _ASM_X86_INTEL_FAMILY_H
+
+/*
+ * "Big Core" Processors (Branded as Core, Xeon, etc...)
+ *
+ * The "_X" parts are generally the EP and EX Xeons, or the
+ * "Extreme" ones, like Broadwell-E.
+ *
+ * Things ending in "2" are usually because we have no better
+ * name for them. There's no processor called "SILVERMONT2".
+ */
+
+#define INTEL_FAM6_CORE_YONAH 0x0E
+
+#define INTEL_FAM6_CORE2_MEROM 0x0F
+#define INTEL_FAM6_CORE2_MEROM_L 0x16
+#define INTEL_FAM6_CORE2_PENRYN 0x17
+#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D
+
+#define INTEL_FAM6_NEHALEM 0x1E
+#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
+#define INTEL_FAM6_NEHALEM_EP 0x1A
+#define INTEL_FAM6_NEHALEM_EX 0x2E
+
+#define INTEL_FAM6_WESTMERE 0x25
+#define INTEL_FAM6_WESTMERE_EP 0x2C
+#define INTEL_FAM6_WESTMERE_EX 0x2F
+
+#define INTEL_FAM6_SANDYBRIDGE 0x2A
+#define INTEL_FAM6_SANDYBRIDGE_X 0x2D
+#define INTEL_FAM6_IVYBRIDGE 0x3A
+#define INTEL_FAM6_IVYBRIDGE_X 0x3E
+
+#define INTEL_FAM6_HASWELL_CORE 0x3C
+#define INTEL_FAM6_HASWELL_X 0x3F
+#define INTEL_FAM6_HASWELL_ULT 0x45
+#define INTEL_FAM6_HASWELL_GT3E 0x46
+
+#define INTEL_FAM6_BROADWELL_CORE 0x3D
+#define INTEL_FAM6_BROADWELL_GT3E 0x47
+#define INTEL_FAM6_BROADWELL_X 0x4F
+#define INTEL_FAM6_BROADWELL_XEON_D 0x56
+
+#define INTEL_FAM6_SKYLAKE_MOBILE 0x4E
+#define INTEL_FAM6_SKYLAKE_DESKTOP 0x5E
+#define INTEL_FAM6_SKYLAKE_X 0x55
+#define INTEL_FAM6_KABYLAKE_MOBILE 0x8E
+#define INTEL_FAM6_KABYLAKE_DESKTOP 0x9E
+
+/* "Small Core" Processors (Atom) */
+
+#define INTEL_FAM6_ATOM_PINEVIEW 0x1C
+#define INTEL_FAM6_ATOM_LINCROFT 0x26
+#define INTEL_FAM6_ATOM_PENWELL 0x27
+#define INTEL_FAM6_ATOM_CLOVERVIEW 0x35
+#define INTEL_FAM6_ATOM_CEDARVIEW 0x36
+#define INTEL_FAM6_ATOM_SILVERMONT1 0x37 /* BayTrail/BYT / Valleyview */
+#define INTEL_FAM6_ATOM_SILVERMONT2 0x4D /* Avaton/Rangely */
+#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* CherryTrail / Braswell */
+#define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */
+#define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */
+#define INTEL_FAM6_ATOM_GOLDMONT 0x5C
+#define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */
+#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A
+
+/* Xeon Phi */
+
+#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
+#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
+
+#endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/src/crypto/blake2s.c b/src/crypto/blake2s.c
index 1f4052d..c0b95be 100644
--- a/src/crypto/blake2s.c
+++ b/src/crypto/blake2s.c
@@ -121,7 +121,7 @@ void __init blake2s_fpu_init(void)
{
blake2s_use_avx = boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#ifndef COMPAT_CANNOT_USE_AVX512
- blake2s_use_avx512 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512VL) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_ZMM_Hi256, NULL);
+ blake2s_use_avx512 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512VL) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL);
#endif
}
#ifdef CONFIG_AS_AVX
diff --git a/src/crypto/chacha20poly1305.c b/src/crypto/chacha20poly1305.c
index bde923b..4173973 100644
--- a/src/crypto/chacha20poly1305.c
+++ b/src/crypto/chacha20poly1305.c
@@ -16,6 +16,7 @@
#if defined(CONFIG_X86_64)
#include <asm/cpufeature.h>
#include <asm/processor.h>
+#include <asm/intel-family.h>
asmlinkage void poly1305_init_x86_64(void *ctx, const u8 key[16]);
asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp, size_t len, u32 padbit);
asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[16], const u32 nonce[4]);
@@ -41,14 +42,21 @@ static bool chacha20poly1305_use_ssse3 __read_mostly;
static bool chacha20poly1305_use_avx __read_mostly;
static bool chacha20poly1305_use_avx2 __read_mostly;
static bool chacha20poly1305_use_avx512 __read_mostly;
+static bool chacha20poly1305_use_avx512vl __read_mostly;
void __init chacha20poly1305_fpu_init(void)
{
chacha20poly1305_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3);
- chacha20poly1305_use_avx = boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
- chacha20poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
+ chacha20poly1305_use_avx = boot_cpu_has(X86_FEATURE_AVX) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
+ chacha20poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#ifndef COMPAT_CANNOT_USE_AVX512
- chacha20poly1305_use_avx512 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_ZMM_Hi256, NULL);
+ chacha20poly1305_use_avx512 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) &&
+ boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
+ chacha20poly1305_use_avx512vl = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX512F) && boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL);
#endif
}
#elif defined(CONFIG_ARM) || defined(CONFIG_ARM64)
@@ -228,6 +236,11 @@ static void chacha20_crypt(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, u32
ctx->state[12] += (bytes + 63) / 64;
return;
}
+ if (chacha20poly1305_use_avx512vl) {
+ chacha20_avx512vl(dst, src, bytes, &ctx->state[4], &ctx->state[12]);
+ ctx->state[12] += (bytes + 63) / 64;
+ return;
+ }
#endif
#ifdef CONFIG_AS_AVX2
if (chacha20poly1305_use_avx2) {