diff options
Diffstat (limited to 'main.c')
-rw-r--r-- | main.c | 80 |
1 files changed, 69 insertions, 11 deletions
@@ -6,37 +6,95 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/delay.h> -#include "function.h" +#include <linux/slab.h> +#include <linux/sort.h> +#include <asm/cpufeature.h> +#include <asm/processor.h> +#include <asm/fpu/api.h> +#include <asm/simd.h> static unsigned long stamp = 0; module_param(stamp, ulong, 0); int dummy; +enum { BLOCKS_PER_CALL = 16 }; + +static u8 state[128]; +static u8 input[64 * BLOCKS_PER_CALL]; + +#define declare_it(name) \ +asmlinkage void blake2s_compress_ ## name(u8 state[128], const u8 *block, const size_t nblocks, const u32 inc); \ +static __always_inline u8 name(void) \ +{ \ + blake2s_compress_ ## name(state, input, BLOCKS_PER_CALL, 0); \ + return input[0]; \ +} + +#define do_it(name) do { \ + u32 eax = 0, ebx = 0, ecx = 0, edx = 0; \ + for (i = 0; i < WARMUP; ++i) \ + ret |= name(); \ + asm volatile("cpuid" : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx)); \ + for (i = 0; i <= TRIALS; ++i) { \ + trial_times[i] = get_cycles(); \ + ret |= name(); \ + } \ + for (i = 0; i < TRIALS; ++i) \ + trial_times[i] = trial_times[i + 1] - trial_times[i]; \ + sort(trial_times, TRIALS + 1, sizeof(cycles_t), compare_cycles, NULL); \ + median_ ## name = trial_times[TRIALS / 2]; \ +} while (0) + +#define report_it(name) do { \ + pr_err("%lu: %12s: %6llu cycles per block\n", stamp, #name, median_ ## name / BLOCKS_PER_CALL); \ +} while (0) + + +declare_it(avx) +declare_it(avx512_ymm) +declare_it(avx512_zmm) + +static int compare_cycles(const void *a, const void *b) +{ + return *((cycles_t *)a) - *((cycles_t *)b); +} + static int __init mod_init(void) { + enum { WARMUP = 6000, TRIALS = 5000, IDLE = 1 * 1000 }; int ret = 0, i; - cycles_t start, end; + cycles_t *trial_times; + cycles_t median_avx = 0; + cycles_t median_avx512_ymm = 0; + cycles_t median_avx512_zmm = 0; unsigned long flags; DEFINE_SPINLOCK(lock); - + + trial_times = kcalloc(TRIALS + 1, sizeof(cycles_t), GFP_KERNEL); + if (!trial_times) + return -ENOMEM; + msleep(IDLE); spin_lock_irqsave(&lock, flags); - - for (i = 0; i < WARMUP; ++i) - ret |= function(); - start = get_cycles(); - for (i = 0; i < TRIALS; ++i) - ret |= function(); - end = get_cycles(); + kernel_fpu_begin(); + + do_it(avx); + do_it(avx512_ymm); + do_it(avx512_zmm); + + kernel_fpu_end(); spin_unlock_irqrestore(&lock, flags); - pr_err("%lu: %llu cycles per call\n", stamp, (end - start) / TRIALS); + report_it(avx); + report_it(avx512_ymm); + report_it(avx512_zmm); /* Don't let compiler be too clever. */ dummy = ret; + kfree(trial_times); /* We should never actually agree to insert the module. Choosing * -0x1000 here is an amazing hack. It causes the kernel to not |