aboutsummaryrefslogtreecommitdiffstats
path: root/lib/zinc/chacha20/chacha20.c
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2018-10-02 02:46:06 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2019-03-22 00:50:53 -0600
commitd6bb7c42419116bee371487334f7e35dc8cabfc7 (patch)
tree5fe1db4a20ade7d51ec5c1ad0757074b9d8da1fe /lib/zinc/chacha20/chacha20.c
parentzinc: ChaCha20 generic C implementation and selftest (diff)
downloadlinux-dev-d6bb7c42419116bee371487334f7e35dc8cabfc7.tar.xz
linux-dev-d6bb7c42419116bee371487334f7e35dc8cabfc7.zip
zinc: ChaCha20 x86_64 implementation
These x86_64 vectorized implementations are based on Andy Polyakov's implementations, and support SSSE3, AVX-2, AVX-512F, and AVX-512VL. The AVX-512F implementation is disabled on Skylake, due to throttling, and the VL ymm implementation is used instead on that platform; other AVX-512 microarchitectures use AVX-512F. On the left is cycle counts on a Core i7 6700HQ using the AVX-2 codepath, comparing this implementation ("new") to the implementation in the current crypto api ("old"). On the right are benchmarks on a Xeon Gold 5120 using the AVX-512 codepath. The difference is so stark, because the current crypto api's implementation does not support AVX-512 at all. AVX-2 AVX-512 --------- ----------- size old new size old new ---- ---- ---- ---- ---- ---- 0 62 52 0 64 54 16 414 376 16 386 372 32 410 400 32 388 396 48 414 422 48 388 420 64 362 356 64 366 350 80 714 666 80 708 666 96 714 700 96 708 692 112 712 718 112 706 736 128 692 646 128 692 648 144 1042 674 144 1036 682 160 1042 694 160 1036 708 176 1042 726 176 1036 730 192 1018 650 192 1016 658 208 1366 686 208 1360 684 224 1366 696 224 1362 708 240 1366 722 240 1360 732 256 640 656 256 644 500 272 988 1246 272 990 526 288 988 1276 288 988 556 304 992 1296 304 988 576 320 972 1222 320 972 500 336 1318 1256 336 1314 532 352 1318 1276 352 1316 558 368 1316 1294 368 1318 578 384 1294 1218 384 1308 506 400 1642 1258 400 1644 532 416 1642 1282 416 1644 556 432 1642 1302 432 1644 594 448 1628 1224 448 1624 508 464 1970 1258 464 1970 534 480 1970 1280 480 1970 556 496 1970 1300 496 1968 582 512 656 676 512 660 624 528 1010 1290 528 1016 682 544 1010 1306 544 1016 702 560 1010 1332 560 1018 728 576 986 1254 576 998 654 592 1340 1284 592 1344 680 608 1334 1310 608 1344 708 624 1340 1334 624 1344 730 640 1314 1254 640 1326 654 656 1664 1282 656 1670 686 672 1674 1306 672 1670 708 688 1662 1336 688 1670 732 704 1638 1250 704 1652 658 720 1992 1292 720 1998 682 736 1994 1308 736 1998 710 752 1988 1334 752 1996 734 768 1252 1254 768 1256 662 784 1596 1290 784 1606 688 800 1596 1314 800 1606 714 816 1596 1330 816 1606 736 832 1576 1256 832 1584 660 848 1922 1286 848 1948 688 864 1922 1314 864 1950 714 880 1926 1338 880 1948 736 896 1898 1258 896 1912 688 912 2248 1288 912 2258 718 928 2248 1320 928 2258 744 944 2248 1338 944 2256 768 960 2226 1268 960 2238 692 976 2574 1288 976 2584 718 992 2576 1312 992 2584 744 1008 2574 1340 1008 2584 770 While this is CRYPTOGAMS code, the originating code for this happens to be derived from OpenSSL's commit cded951378069a478391843f5f8653c1eb5128da Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com> Signed-off-by: Samuel Neves <sneves@dei.uc.pt> Co-developed-by: Samuel Neves <sneves@dei.uc.pt> Co-developed-by: Andy Polyakov <appro@openssl.org> Cc: Andy Polyakov <appro@openssl.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: x86@kernel.org Cc: Samuel Neves <sneves@dei.uc.pt> Cc: Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: kernel-hardening@lists.openwall.com Cc: linux-crypto@vger.kernel.org
Diffstat (limited to 'lib/zinc/chacha20/chacha20.c')
-rw-r--r--lib/zinc/chacha20/chacha20.c4
1 files changed, 4 insertions, 0 deletions
diff --git a/lib/zinc/chacha20/chacha20.c b/lib/zinc/chacha20/chacha20.c
index 6d3f62f0ef26..b47aefd65e80 100644
--- a/lib/zinc/chacha20/chacha20.c
+++ b/lib/zinc/chacha20/chacha20.c
@@ -16,6 +16,9 @@
#include <linux/vmalloc.h>
#include <crypto/algapi.h> // For crypto_xor_cpy.
+#if defined(CONFIG_ZINC_ARCH_X86_64)
+#include "chacha20-x86_64-glue.c"
+#else
static bool *const chacha20_nobs[] __initconst = { };
static void __init chacha20_fpu_init(void)
{
@@ -33,6 +36,7 @@ static inline bool hchacha20_arch(u32 derived_key[CHACHA20_KEY_WORDS],
{
return false;
}
+#endif
#define QUARTER_ROUND(x, a, b, c, d) ( \
x[a] += x[b], \