[zinc] Add PowerPC accelerated poly1305 from openssl/cryptogramssl/ppc

Unfortunately I am not seeing a speed up with this patch, but it does decrease CPU usage. Only (currently) runs on the outbound path, as the in-bound path is in an interrupt, but that can be fixed in Linux. v2: - Do not include the FPU version, as +10% performance on POWER8 (admittedly better on really old CPUs, like old world macs) is not worth it, especially when there is a fast VSX version available. - Honor CONFIG_VSX. Signed-off-by: Shawn Landden <shawn@git.icu>
author: Shawn Landden <shawn@git.icu> 2019-05-11 14:19:51 -0300
committer: Shawn Landden <shawn@git.icu> 2019-05-13 16:25:28 -0500
commit: cc6513fd7d0e049c0c0dae0c6ef62eb6ad11afe1 (patch)
tree: 8ea0fe17d01d9287d090a05305f1210cf64a555d /src/crypto/zinc/poly1305/poly1305-arm-glue.c
parent: [Zinc] Add PowerPC chacha20 implementation from openssl/cryptograms (diff)
download: wireguard-monolithic-historical-sl/ppc.tar.xz
wireguard-monolithic-historical-sl/ppc.zip
1 files changed, 0 insertions, 65 deletions
diff --git a/src/crypto/zinc/poly1305/poly1305-arm-glue.c b/src/crypto/zinc/poly1305/poly1305-arm-glue.c
index a80f046..6100700 100644
--- a/src/crypto/zinc/poly1305/poly1305-arm-glue.c
+++ b/src/crypto/zinc/poly1305/poly1305-arm-glue.c
@@ -26,71 +26,6 @@ static void __init poly1305_fpu_init(void)
 #endif
 }
 
-#if defined(CONFIG_ZINC_ARCH_ARM64)
-struct poly1305_arch_internal {
-	union {
-		u32 h[5];
-		struct {
-			u64 h0, h1, h2;
-		};
-	};
-	u64 is_base2_26;
-	u64 r[2];
-};
-#elif defined(CONFIG_ZINC_ARCH_ARM)
-struct poly1305_arch_internal {
-	union {
-		u32 h[5];
-		struct {
-			u64 h0, h1;
-			u32 h2;
-		} __packed;
-	};
-	u32 r[4];
-	u32 is_base2_26;
-};
-#endif
-
-/* The NEON code uses base 2^26, while the scalar code uses base 2^64 on 64-bit
- * and base 2^32 on 32-bit. If we hit the unfortunate situation of using NEON
- * and then having to go back to scalar -- because the user is silly and has
- * called the update function from two separate contexts -- then we need to
- * convert back to the original base before proceeding. The below function is
- * written for 64-bit integers, and so we have to swap words at the end on
- * big-endian 32-bit. It is possible to reason that the initial reduction below
- * is sufficient given the implementation invariants. However, for an avoidance
- * of doubt and because this is not performance critical, we do the full
- * reduction anyway.
- */
-static void convert_to_base2_64(void *ctx)
-{
-	struct poly1305_arch_internal *state = ctx;
-	u32 cy;
-
-	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !state->is_base2_26)
-		return;
-
-	cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy;
-	cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy;
-	cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy;
-	cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy;
-	state->h0 = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0];
-	state->h1 = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12);
-	state->h2 = state->h[4] >> 24;
-	if (IS_ENABLED(CONFIG_ZINC_ARCH_ARM) && IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
-		state->h0 = rol64(state->h0, 32);
-		state->h1 = rol64(state->h1, 32);
-	}
-#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
-	cy = (state->h2 >> 2) + (state->h2 & ~3ULL);
-	state->h2 &= 3;
-	state->h0 += cy;
-	state->h1 += (cy = ULT(state->h0, cy));
-	state->h2 += ULT(state->h1, cy);
-#undef ULT
-	state->is_base2_26 = 0;
-}
-
 static inline bool poly1305_init_arch(void *ctx,
 				      const u8 key[POLY1305_KEY_SIZE])
 {
author	Shawn Landden <shawn@git.icu>	2019-05-11 14:19:51 -0300
committer	Shawn Landden <shawn@git.icu>	2019-05-13 16:25:28 -0500
commit	cc6513fd7d0e049c0c0dae0c6ef62eb6ad11afe1 (patch)
tree	8ea0fe17d01d9287d090a05305f1210cf64a555d /src/crypto/zinc/poly1305/poly1305-arm-glue.c
parent	[Zinc] Add PowerPC chacha20 implementation from openssl/cryptograms (diff)
download	wireguard-monolithic-historical-sl/ppc.tar.xz wireguard-monolithic-historical-sl/ppc.zip