From ad5e42105059b23545b6958b3d8d4a65e7c78f01 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Fri, 5 Oct 2018 23:20:40 +0200
Subject: global: rename include'd C files to be .c

This is done by 259 other files in the kernel tree:

    linux $ rg '#include.*\.c' -l | wc -l
    259

Suggested-by: Sultan Alsawaf <sultanxda@gmail.com>
---
 src/crypto/zinc/poly1305/poly1305-arm-glue.c    | 139 +++++++++++++++++++++
 src/crypto/zinc/poly1305/poly1305-arm-glue.h    | 139 ---------------------
 src/crypto/zinc/poly1305/poly1305-mips-glue.c   |  36 ++++++
 src/crypto/zinc/poly1305/poly1305-mips-glue.h   |  36 ------
 src/crypto/zinc/poly1305/poly1305-x86_64-glue.c | 154 ++++++++++++++++++++++++
 src/crypto/zinc/poly1305/poly1305-x86_64-glue.h | 154 ------------------------
 src/crypto/zinc/poly1305/poly1305.c             |   8 +-
 7 files changed, 333 insertions(+), 333 deletions(-)
 create mode 100644 src/crypto/zinc/poly1305/poly1305-arm-glue.c
 delete mode 100644 src/crypto/zinc/poly1305/poly1305-arm-glue.h
 create mode 100644 src/crypto/zinc/poly1305/poly1305-mips-glue.c
 delete mode 100644 src/crypto/zinc/poly1305/poly1305-mips-glue.h
 create mode 100644 src/crypto/zinc/poly1305/poly1305-x86_64-glue.c
 delete mode 100644 src/crypto/zinc/poly1305/poly1305-x86_64-glue.h

(limited to 'src/crypto/zinc/poly1305')

diff --git a/src/crypto/zinc/poly1305/poly1305-arm-glue.c b/src/crypto/zinc/poly1305/poly1305-arm-glue.c
new file mode 100644
index 0000000..f91066f
--- /dev/null
+++ b/src/crypto/zinc/poly1305/poly1305-arm-glue.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+
+asmlinkage void poly1305_init_arm(void *ctx, const u8 key[16]);
+asmlinkage void poly1305_blocks_arm(void *ctx, const u8 *inp, const size_t len,
+				    const u32 padbit);
+asmlinkage void poly1305_emit_arm(void *ctx, u8 mac[16], const u32 nonce[4]);
+asmlinkage void poly1305_blocks_neon(void *ctx, const u8 *inp, const size_t len,
+				     const u32 padbit);
+asmlinkage void poly1305_emit_neon(void *ctx, u8 mac[16], const u32 nonce[4]);
+
+static bool poly1305_use_neon __ro_after_init;
+
+static void __init poly1305_fpu_init(void)
+{
+#if defined(CONFIG_ZINC_ARCH_ARM64)
+	poly1305_use_neon = elf_hwcap & HWCAP_ASIMD;
+#elif defined(CONFIG_ZINC_ARCH_ARM)
+	poly1305_use_neon = elf_hwcap & HWCAP_NEON;
+#endif
+}
+
+#if defined(CONFIG_ZINC_ARCH_ARM64)
+struct poly1305_arch_internal {
+	union {
+		u32 h[5];
+		struct {
+			u64 h0, h1, h2;
+		};
+	};
+	u64 is_base2_26;
+	u64 r[2];
+};
+#elif defined(CONFIG_ZINC_ARCH_ARM)
+struct poly1305_arch_internal {
+	union {
+		u32 h[5];
+		struct {
+			u64 h0, h1;
+			u32 h2;
+		} __packed;
+	};
+	u32 r[4];
+	u32 is_base2_26;
+};
+#endif
+
+/* The NEON code uses base 2^26, while the scalar code uses base 2^64 on 64-bit
+ * and base 2^32 on 32-bit. If we hit the unfortunate situation of using NEON
+ * and then having to go back to scalar -- because the user is silly and has
+ * called the update function from two separate contexts -- then we need to
+ * convert back to the original base before proceeding. The below function is
+ * written for 64-bit integers, and so we have to swap words at the end on
+ * big-endian 32-bit. It is possible to reason that the initial reduction below
+ * is sufficient given the implementation invariants. However, for an avoidance
+ * of doubt and because this is not performance critical, we do the full
+ * reduction anyway.
+ */
+static void convert_to_base2_64(void *ctx)
+{
+	struct poly1305_arch_internal *state = ctx;
+	u32 cy;
+
+	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !state->is_base2_26)
+		return;
+
+	cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
+	cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
+	cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
+	cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
+	state->h0 = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
+	state->h1 = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
+	state->h2 = state->h[4] >> 24;
+	if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
+		state->h0 = rol64(state->h0, 32);
+		state->h1 = rol64(state->h1, 32);
+	}
+#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
+	cy = (state->h2 >> 2) + (state->h2 & ~3ULL);
+	state->h2 &= 3;
+	state->h0 += cy;
+	state->h1 += (cy = ULT(state->h0, cy));
+	state->h2 += ULT(state->h1, cy);
+#undef ULT
+	state->is_base2_26 = 0;
+}
+
+static inline bool poly1305_init_arch(void *ctx,
+				      const u8 key[POLY1305_KEY_SIZE])
+{
+	poly1305_init_arm(ctx, key);
+	return true;
+}
+
+static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
+					size_t len, const u32 padbit,
+					simd_context_t *simd_context)
+{
+	/* SIMD disables preemption, so relax after processing each page. */
+	BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
+		     PAGE_SIZE % POLY1305_BLOCK_SIZE);
+
+	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
+	    !simd_use(simd_context)) {
+		convert_to_base2_64(ctx);
+		poly1305_blocks_arm(ctx, inp, len, padbit);
+		return true;
+	}
+
+	for (;;) {
+		const size_t bytes = min_t(size_t, len, PAGE_SIZE);
+
+		poly1305_blocks_neon(ctx, inp, bytes, padbit);
+		len -= bytes;
+		if (!len)
+			break;
+		inp += bytes;
+		simd_relax(simd_context);
+	}
+	return true;
+}
+
+static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
+				      const u32 nonce[4],
+				      simd_context_t *simd_context)
+{
+	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
+	    !simd_use(simd_context)) {
+		convert_to_base2_64(ctx);
+		poly1305_emit_arm(ctx, mac, nonce);
+	} else
+		poly1305_emit_neon(ctx, mac, nonce);
+	return true;
+}
diff --git a/src/crypto/zinc/poly1305/poly1305-arm-glue.h b/src/crypto/zinc/poly1305/poly1305-arm-glue.h
deleted file mode 100644
index 92aa226..0000000
--- a/src/crypto/zinc/poly1305/poly1305-arm-glue.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
-/*
- * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- */
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-
-asmlinkage void poly1305_init_arm(void *ctx, const u8 key[16]);
-asmlinkage void poly1305_blocks_arm(void *ctx, const u8 *inp, const size_t len,
-				    const u32 padbit);
-asmlinkage void poly1305_emit_arm(void *ctx, u8 mac[16], const u32 nonce[4]);
-asmlinkage void poly1305_blocks_neon(void *ctx, const u8 *inp, const size_t len,
-				     const u32 padbit);
-asmlinkage void poly1305_emit_neon(void *ctx, u8 mac[16], const u32 nonce[4]);
-
-static bool poly1305_use_neon __ro_after_init;
-
-static void __init poly1305_fpu_init(void)
-{
-#if defined(CONFIG_ZINC_ARCH_ARM64)
-	poly1305_use_neon = elf_hwcap & HWCAP_ASIMD;
-#elif defined(CONFIG_ZINC_ARCH_ARM)
-	poly1305_use_neon = elf_hwcap & HWCAP_NEON;
-#endif
-}
-
-#if defined(CONFIG_ZINC_ARCH_ARM64)
-struct poly1305_arch_internal {
-	union {
-		u32 h[5];
-		struct {
-			u64 h0, h1, h2;
-		};
-	};
-	u64 is_base2_26;
-	u64 r[2];
-};
-#elif defined(CONFIG_ZINC_ARCH_ARM)
-struct poly1305_arch_internal {
-	union {
-		u32 h[5];
-		struct {
-			u64 h0, h1;
-			u32 h2;
-		} __packed;
-	};
-	u32 r[4];
-	u32 is_base2_26;
-};
-#endif
-
-/* The NEON code uses base 2^26, while the scalar code uses base 2^64 on 64-bit
- * and base 2^32 on 32-bit. If we hit the unfortunate situation of using NEON
- * and then having to go back to scalar -- because the user is silly and has
- * called the update function from two separate contexts -- then we need to
- * convert back to the original base before proceeding. The below function is
- * written for 64-bit integers, and so we have to swap words at the end on
- * big-endian 32-bit. It is possible to reason that the initial reduction below
- * is sufficient given the implementation invariants. However, for an avoidance
- * of doubt and because this is not performance critical, we do the full
- * reduction anyway.
- */
-static void convert_to_base2_64(void *ctx)
-{
-	struct poly1305_arch_internal *state = ctx;
-	u32 cy;
-
-	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !state->is_base2_26)
-		return;
-
-	cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
-	cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
-	cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
-	cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
-	state->h0 = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
-	state->h1 = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
-	state->h2 = state->h[4] >> 24;
-	if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
-		state->h0 = rol64(state->h0, 32);
-		state->h1 = rol64(state->h1, 32);
-	}
-#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
-	cy = (state->h2 >> 2) + (state->h2 & ~3ULL);
-	state->h2 &= 3;
-	state->h0 += cy;
-	state->h1 += (cy = ULT(state->h0, cy));
-	state->h2 += ULT(state->h1, cy);
-#undef ULT
-	state->is_base2_26 = 0;
-}
-
-static inline bool poly1305_init_arch(void *ctx,
-				      const u8 key[POLY1305_KEY_SIZE])
-{
-	poly1305_init_arm(ctx, key);
-	return true;
-}
-
-static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
-					size_t len, const u32 padbit,
-					simd_context_t *simd_context)
-{
-	/* SIMD disables preemption, so relax after processing each page. */
-	BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
-		     PAGE_SIZE % POLY1305_BLOCK_SIZE);
-
-	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
-	    !simd_use(simd_context)) {
-		convert_to_base2_64(ctx);
-		poly1305_blocks_arm(ctx, inp, len, padbit);
-		return true;
-	}
-
-	for (;;) {
-		const size_t bytes = min_t(size_t, len, PAGE_SIZE);
-
-		poly1305_blocks_neon(ctx, inp, bytes, padbit);
-		len -= bytes;
-		if (!len)
-			break;
-		inp += bytes;
-		simd_relax(simd_context);
-	}
-	return true;
-}
-
-static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
-				      const u32 nonce[4],
-				      simd_context_t *simd_context)
-{
-	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !poly1305_use_neon ||
-	    !simd_use(simd_context)) {
-		convert_to_base2_64(ctx);
-		poly1305_emit_arm(ctx, mac, nonce);
-	} else
-		poly1305_emit_neon(ctx, mac, nonce);
-	return true;
-}
diff --git a/src/crypto/zinc/poly1305/poly1305-mips-glue.c b/src/crypto/zinc/poly1305/poly1305-mips-glue.c
new file mode 100644
index 0000000..3a72d61
--- /dev/null
+++ b/src/crypto/zinc/poly1305/poly1305-mips-glue.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+asmlinkage void poly1305_init_mips(void *ctx, const u8 key[16]);
+asmlinkage void poly1305_blocks_mips(void *ctx, const u8 *inp, const size_t len,
+				     const u32 padbit);
+asmlinkage void poly1305_emit_mips(void *ctx, u8 mac[16], const u32 nonce[4]);
+
+static void __init poly1305_fpu_init(void)
+{
+}
+
+static inline bool poly1305_init_arch(void *ctx,
+				      const u8 key[POLY1305_KEY_SIZE])
+{
+	poly1305_init_mips(ctx, key);
+	return true;
+}
+
+static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
+					size_t len, const u32 padbit,
+					simd_context_t *simd_context)
+{
+	poly1305_blocks_mips(ctx, inp, len, padbit);
+	return true;
+}
+
+static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
+				      const u32 nonce[4],
+				      simd_context_t *simd_context)
+{
+	poly1305_emit_mips(ctx, mac, nonce);
+	return true;
+}
diff --git a/src/crypto/zinc/poly1305/poly1305-mips-glue.h b/src/crypto/zinc/poly1305/poly1305-mips-glue.h
deleted file mode 100644
index f71c3f6..0000000
--- a/src/crypto/zinc/poly1305/poly1305-mips-glue.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
-/*
- * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- */
-
-asmlinkage void poly1305_init_mips(void *ctx, const u8 key[16]);
-asmlinkage void poly1305_blocks_mips(void *ctx, const u8 *inp, const size_t len,
-				     const u32 padbit);
-asmlinkage void poly1305_emit_mips(void *ctx, u8 mac[16], const u32 nonce[4]);
-
-static void __init poly1305_fpu_init(void)
-{
-}
-
-static inline bool poly1305_init_arch(void *ctx,
-				      const u8 key[POLY1305_KEY_SIZE])
-{
-	poly1305_init_mips(ctx, key);
-	return true;
-}
-
-static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
-					size_t len, const u32 padbit,
-					simd_context_t *simd_context)
-{
-	poly1305_blocks_mips(ctx, inp, len, padbit);
-	return true;
-}
-
-static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
-				      const u32 nonce[4],
-				      simd_context_t *simd_context)
-{
-	poly1305_emit_mips(ctx, mac, nonce);
-	return true;
-}
diff --git a/src/crypto/zinc/poly1305/poly1305-x86_64-glue.c b/src/crypto/zinc/poly1305/poly1305-x86_64-glue.c
new file mode 100644
index 0000000..3641dc8
--- /dev/null
+++ b/src/crypto/zinc/poly1305/poly1305-x86_64-glue.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/intel-family.h>
+
+asmlinkage void poly1305_init_x86_64(void *ctx,
+				     const u8 key[POLY1305_KEY_SIZE]);
+asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
+				       const size_t len, const u32 padbit);
+asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_MAC_SIZE],
+				     const u32 nonce[4]);
+asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_MAC_SIZE],
+				  const u32 nonce[4]);
+asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
+				    const u32 padbit);
+asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
+				     const u32 padbit);
+asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
+				       const size_t len, const u32 padbit);
+
+static bool poly1305_use_avx __ro_after_init;
+static bool poly1305_use_avx2 __ro_after_init;
+static bool poly1305_use_avx512 __ro_after_init;
+
+static void __init poly1305_fpu_init(void)
+{
+	poly1305_use_avx =
+		boot_cpu_has(X86_FEATURE_AVX) &&
+		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
+	poly1305_use_avx2 =
+		boot_cpu_has(X86_FEATURE_AVX) &&
+		boot_cpu_has(X86_FEATURE_AVX2) &&
+		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
+#ifndef COMPAT_CANNOT_USE_AVX512
+	poly1305_use_avx512 =
+		boot_cpu_has(X86_FEATURE_AVX) &&
+		boot_cpu_has(X86_FEATURE_AVX2) &&
+		boot_cpu_has(X86_FEATURE_AVX512F) &&
+		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
+				  XFEATURE_MASK_AVX512, NULL) &&
+		/* Skylake downclocks unacceptably much when using zmm. */
+		boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
+#endif
+}
+
+static inline bool poly1305_init_arch(void *ctx,
+				      const u8 key[POLY1305_KEY_SIZE])
+{
+	poly1305_init_x86_64(ctx, key);
+	return true;
+}
+
+struct poly1305_arch_internal {
+	union {
+		struct {
+			u32 h[5];
+			u32 is_base2_26;
+		};
+		u64 hs[3];
+	};
+	u64 r[2];
+	u64 pad;
+	struct { u32 r2, r1, r4, r3; } rn[9];
+};
+
+/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
+ * the unfortunate situation of using AVX and then having to go back to scalar
+ * -- because the user is silly and has called the update function from two
+ * separate contexts -- then we need to convert back to the original base before
+ * proceeding. It is possible to reason that the initial reduction below is
+ * sufficient given the implementation invariants. However, for an avoidance of
+ * doubt and because this is not performance critical, we do the full reduction
+ * anyway.
+ */
+static void convert_to_base2_64(void *ctx)
+{
+	struct poly1305_arch_internal *state = ctx;
+	u32 cy;
+
+	if (!state->is_base2_26)
+		return;
+
+	cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
+	cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
+	cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
+	cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
+	state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
+	state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
+	state->hs[2] = state->h[4] >> 24;
+#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
+	cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
+	state->hs[2] &= 3;
+	state->hs[0] += cy;
+	state->hs[1] += (cy = ULT(state->hs[0], cy));
+	state->hs[2] += ULT(state->hs[1], cy);
+#undef ULT
+	state->is_base2_26 = 0;
+}
+
+static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
+					size_t len, const u32 padbit,
+					simd_context_t *simd_context)
+{
+	struct poly1305_arch_internal *state = ctx;
+
+	/* SIMD disables preemption, so relax after processing each page. */
+	BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
+		     PAGE_SIZE % POLY1305_BLOCK_SIZE);
+
+	if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx ||
+	    (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
+	    !simd_use(simd_context)) {
+		convert_to_base2_64(ctx);
+		poly1305_blocks_x86_64(ctx, inp, len, padbit);
+		return true;
+	}
+
+	for (;;) {
+		const size_t bytes = min_t(size_t, len, PAGE_SIZE);
+
+		if (IS_ENABLED(CONFIG_AS_AVX512) && poly1305_use_avx512)
+			poly1305_blocks_avx512(ctx, inp, bytes, padbit);
+		else if (IS_ENABLED(CONFIG_AS_AVX2) && poly1305_use_avx2)
+			poly1305_blocks_avx2(ctx, inp, bytes, padbit);
+		else
+			poly1305_blocks_avx(ctx, inp, bytes, padbit);
+		len -= bytes;
+		if (!len)
+			break;
+		inp += bytes;
+		simd_relax(simd_context);
+	}
+
+	return true;
+}
+
+static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
+				      const u32 nonce[4],
+				      simd_context_t *simd_context)
+{
+	struct poly1305_arch_internal *state = ctx;
+
+	if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx ||
+	    !state->is_base2_26 || !simd_use(simd_context)) {
+		convert_to_base2_64(ctx);
+		poly1305_emit_x86_64(ctx, mac, nonce);
+	} else
+		poly1305_emit_avx(ctx, mac, nonce);
+	return true;
+}
diff --git a/src/crypto/zinc/poly1305/poly1305-x86_64-glue.h b/src/crypto/zinc/poly1305/poly1305-x86_64-glue.h
deleted file mode 100644
index 585b579..0000000
--- a/src/crypto/zinc/poly1305/poly1305-x86_64-glue.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
-/*
- * Copyright (C) 2015-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- */
-
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-#include <asm/intel-family.h>
-
-asmlinkage void poly1305_init_x86_64(void *ctx,
-				     const u8 key[POLY1305_KEY_SIZE]);
-asmlinkage void poly1305_blocks_x86_64(void *ctx, const u8 *inp,
-				       const size_t len, const u32 padbit);
-asmlinkage void poly1305_emit_x86_64(void *ctx, u8 mac[POLY1305_MAC_SIZE],
-				     const u32 nonce[4]);
-asmlinkage void poly1305_emit_avx(void *ctx, u8 mac[POLY1305_MAC_SIZE],
-				  const u32 nonce[4]);
-asmlinkage void poly1305_blocks_avx(void *ctx, const u8 *inp, const size_t len,
-				    const u32 padbit);
-asmlinkage void poly1305_blocks_avx2(void *ctx, const u8 *inp, const size_t len,
-				     const u32 padbit);
-asmlinkage void poly1305_blocks_avx512(void *ctx, const u8 *inp,
-				       const size_t len, const u32 padbit);
-
-static bool poly1305_use_avx __ro_after_init;
-static bool poly1305_use_avx2 __ro_after_init;
-static bool poly1305_use_avx512 __ro_after_init;
-
-static void __init poly1305_fpu_init(void)
-{
-	poly1305_use_avx =
-		boot_cpu_has(X86_FEATURE_AVX) &&
-		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-	poly1305_use_avx2 =
-		boot_cpu_has(X86_FEATURE_AVX) &&
-		boot_cpu_has(X86_FEATURE_AVX2) &&
-		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-#ifndef COMPAT_CANNOT_USE_AVX512
-	poly1305_use_avx512 =
-		boot_cpu_has(X86_FEATURE_AVX) &&
-		boot_cpu_has(X86_FEATURE_AVX2) &&
-		boot_cpu_has(X86_FEATURE_AVX512F) &&
-		cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
-				  XFEATURE_MASK_AVX512, NULL) &&
-		/* Skylake downclocks unacceptably much when using zmm. */
-		boot_cpu_data.x86_model != INTEL_FAM6_SKYLAKE_X;
-#endif
-}
-
-static inline bool poly1305_init_arch(void *ctx,
-				      const u8 key[POLY1305_KEY_SIZE])
-{
-	poly1305_init_x86_64(ctx, key);
-	return true;
-}
-
-struct poly1305_arch_internal {
-	union {
-		struct {
-			u32 h[5];
-			u32 is_base2_26;
-		};
-		u64 hs[3];
-	};
-	u64 r[2];
-	u64 pad;
-	struct { u32 r2, r1, r4, r3; } rn[9];
-};
-
-/* The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit
- * the unfortunate situation of using AVX and then having to go back to scalar
- * -- because the user is silly and has called the update function from two
- * separate contexts -- then we need to convert back to the original base before
- * proceeding. It is possible to reason that the initial reduction below is
- * sufficient given the implementation invariants. However, for an avoidance of
- * doubt and because this is not performance critical, we do the full reduction
- * anyway.
- */
-static void convert_to_base2_64(void *ctx)
-{
-	struct poly1305_arch_internal *state = ctx;
-	u32 cy;
-
-	if (!state->is_base2_26)
-		return;
-
-	cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
-	cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
-	cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
-	cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
-	state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
-	state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
-	state->hs[2] = state->h[4] >> 24;
-#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
-	cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL);
-	state->hs[2] &= 3;
-	state->hs[0] += cy;
-	state->hs[1] += (cy = ULT(state->hs[0], cy));
-	state->hs[2] += ULT(state->hs[1], cy);
-#undef ULT
-	state->is_base2_26 = 0;
-}
-
-static inline bool poly1305_blocks_arch(void *ctx, const u8 *inp,
-					size_t len, const u32 padbit,
-					simd_context_t *simd_context)
-{
-	struct poly1305_arch_internal *state = ctx;
-
-	/* SIMD disables preemption, so relax after processing each page. */
-	BUILD_BUG_ON(PAGE_SIZE < POLY1305_BLOCK_SIZE ||
-		     PAGE_SIZE % POLY1305_BLOCK_SIZE);
-
-	if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx ||
-	    (len < (POLY1305_BLOCK_SIZE * 18) && !state->is_base2_26) ||
-	    !simd_use(simd_context)) {
-		convert_to_base2_64(ctx);
-		poly1305_blocks_x86_64(ctx, inp, len, padbit);
-		return true;
-	}
-
-	for (;;) {
-		const size_t bytes = min_t(size_t, len, PAGE_SIZE);
-
-		if (IS_ENABLED(CONFIG_AS_AVX512) && poly1305_use_avx512)
-			poly1305_blocks_avx512(ctx, inp, bytes, padbit);
-		else if (IS_ENABLED(CONFIG_AS_AVX2) && poly1305_use_avx2)
-			poly1305_blocks_avx2(ctx, inp, bytes, padbit);
-		else
-			poly1305_blocks_avx(ctx, inp, bytes, padbit);
-		len -= bytes;
-		if (!len)
-			break;
-		inp += bytes;
-		simd_relax(simd_context);
-	}
-
-	return true;
-}
-
-static inline bool poly1305_emit_arch(void *ctx, u8 mac[POLY1305_MAC_SIZE],
-				      const u32 nonce[4],
-				      simd_context_t *simd_context)
-{
-	struct poly1305_arch_internal *state = ctx;
-
-	if (!IS_ENABLED(CONFIG_AS_AVX) || !poly1305_use_avx ||
-	    !state->is_base2_26 || !simd_use(simd_context)) {
-		convert_to_base2_64(ctx);
-		poly1305_emit_x86_64(ctx, mac, nonce);
-	} else
-		poly1305_emit_avx(ctx, mac, nonce);
-	return true;
-}
diff --git a/src/crypto/zinc/poly1305/poly1305.c b/src/crypto/zinc/poly1305/poly1305.c
index 4ab8116..88f94cd 100644
--- a/src/crypto/zinc/poly1305/poly1305.c
+++ b/src/crypto/zinc/poly1305/poly1305.c
@@ -16,11 +16,11 @@
 #include <linux/init.h>
 
 #if defined(CONFIG_ZINC_ARCH_X86_64)
-#include "poly1305-x86_64-glue.h"
+#include "poly1305-x86_64-glue.c"
 #elif defined(CONFIG_ZINC_ARCH_ARM) || defined(CONFIG_ZINC_ARCH_ARM64)
-#include "poly1305-arm-glue.h"
+#include "poly1305-arm-glue.c"
 #elif defined(CONFIG_ZINC_ARCH_MIPS) || defined(CONFIG_ZINC_ARCH_MIPS64)
-#include "poly1305-mips-glue.h"
+#include "poly1305-mips-glue.c"
 #else
 static inline bool poly1305_init_arch(void *ctx,
 				      const u8 key[POLY1305_KEY_SIZE])
@@ -134,7 +134,7 @@ void poly1305_final(struct poly1305_ctx *ctx, u8 mac[POLY1305_MAC_SIZE],
 }
 EXPORT_SYMBOL(poly1305_final);
 
-#include "../selftest/poly1305.h"
+#include "../selftest/poly1305.c"
 
 static bool nosimd __initdata = false;
 
-- 
cgit v1.2.3-59-g8ed1b