diff options
-rw-r--r-- | Makefile | 20 | ||||
-rw-r--r-- | README.md | 26 | ||||
-rw-r--r-- | hsiphash.c | 20 | ||||
-rw-r--r-- | lfsr.c | 18 | ||||
-rw-r--r-- | lfsr2.c | 24 | ||||
-rw-r--r-- | main.c | 95 | ||||
-rw-r--r-- | quarcha.c | 36 | ||||
-rw-r--r-- | rotxor32.c | 9 | ||||
-rw-r--r-- | rotxor64.c | 10 | ||||
-rwxr-xr-x | run.sh | 35 | ||||
-rw-r--r-- | siphash.c | 22 | ||||
-rw-r--r-- | sparkle.c | 60 | ||||
-rw-r--r-- | speck.c | 29 | ||||
-rw-r--r-- | spelvin.c | 26 |
14 files changed, 430 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3677112 --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +ifneq ($(KERNELRELEASE),) +kbench9000-y := $(sort $(patsubst %.c,%.o,$(filter-out %.mod.c,$(subst $(M)/,,$(wildcard $(M)/*.c))))) +obj-m := kbench9000.o +ccflags-y += -O3 +ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt' +else +KERNELDIR ?= /lib/modules/$(shell uname -r)/build +PWD := $(shell pwd) + +default: build + +run: build + sudo ./run.sh +build: + $(MAKE) -C $(KERNELDIR) M=$(PWD) +clean: + $(MAKE) -C $(KERNELDIR) M=$(PWD) clean +.PHONY: default run build clean +endif + diff --git a/README.md b/README.md new file mode 100644 index 0000000..3fd3180 --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +# kBench9000 – simple kernel land cycle counter +### by [Jason A. Donenfeld](mailto:jason@zx2c4.com) + +This is a very simple kernel land cycle counter. To use, simply edit `function.h`, +add any other `.c` files and mention them in the `kbench9000-y +=` line of the +`Makefile`, and then type: + +``` +$ make run +``` + +![Expected kBench9000 output](https://data.zx2c4.com/kbench9000-screenshot.png) + +### Kernel Toolchain + +You'll need to have a working kernel toolchain, usually achievable by: + +``` +$ sudo apt install linux-headers-$(uname -r) build-essential +``` + +or + +``` +$ sudo dnf install kernel-devel @development-tools +``` diff --git a/hsiphash.c b/hsiphash.c new file mode 100644 index 0000000..7c2263c --- /dev/null +++ b/hsiphash.c @@ -0,0 +1,20 @@ +#include <linux/kernel.h> + +#define HSIPROUND \ + do { \ + s[0] += s[1]; s[1] = rol32(s[1], 5); s[1] ^= s[0]; s[0] = rol32(s[0], 16); \ + s[2] += s[3]; s[3] = rol32(s[3], 8); s[3] ^= s[2]; \ + s[0] += s[3]; s[3] = rol32(s[3], 7); s[3] ^= s[0]; \ + s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16); \ + } while (0) + +void mix_hsiphash(u32 s[4], const u32 v[4]) +{ + size_t i; + + for (i = 0; i < 4; ++i) { + s[3] ^= v[i]; + HSIPROUND; + s[0] ^= v[i]; + } +} @@ -0,0 +1,18 @@ +#include <linux/kernel.h> + +void mix_lfsr(u32 h[4], const u32 v[4]) +{ + u32 w; + int i; + + for (i = 0; i < 4; ++i) { + w = h[0] ^ h[1] ^ h[3] ^ v[i]; + w ^= w << 17; + w ^= w >> 6; + w ^= w >> 9; + h[0] = h[1]; + h[1] = h[2]; + h[2] = h[3]; + h[3] = w; + } +} @@ -0,0 +1,24 @@ +#include <linux/kernel.h> + +void mix_lfsr2(u32 h[4], const u32 v[4]) +{ + size_t i; + u32 w; + +#define R(a) ({ \ + u32 x = (a); \ + x ^= rol32(x, 1) ^ rol32(x, 2); \ + x ^= rol32(x, 7) ^ rol32(x, 14); \ + x ^= rol32(x, 12) ^ rol32(x, 24); \ + x; \ +}) + for (i = 0; i < 4; ++i) { + h[0] ^= v[i]; + w = R(h[0]) ^ (h[0] << 1) ^ (h[2] << 2) ^ R(h[3]); + h[0] = h[1]; + h[1] = h[2]; + h[2] = h[3]; + h[3] = w; + } +#undef R +} @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2018-2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/sort.h> +#include <linux/random.h> +#include <asm/cpufeature.h> +#include <asm/processor.h> +#include <asm/fpu/api.h> +#include <asm/simd.h> + +static unsigned long stamp = 0; +module_param(stamp, ulong, 0); + +#define declare_it(name) bool mix_ ##name(u32 h[4], const u32 v[4]) + +#define local_it(name) cycles_t median_ ##name = 0 + +#define do_it(name) do { \ + u32 eax = 0, ebx = 0, ecx = 0, edx = 0; \ + for (i = 0; i < WARMUP; ++i) \ + ret |= mix_ ##name(pool, input); \ + asm volatile("cpuid" : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx)); \ + for (i = 0; i <= TRIALS; ++i) { \ + trial_times[i] = get_cycles(); \ + ret |= mix_ ##name(pool, input); \ + } \ + for (i = 0; i < TRIALS; ++i) \ + trial_times[i] = trial_times[i + 1] - trial_times[i]; \ + sort(trial_times, TRIALS + 1, sizeof(cycles_t), compare_cycles, NULL); \ + median_ ## name = trial_times[TRIALS / 2]; \ +} while (0) + +#define report_it(name) do { \ + pr_err("%lu: %12s: %6llu cycles per call\n", stamp, #name, median_ ## name); \ +} while (0) + +#define forall(f) \ + f(spelvin); \ + f(rotxor32); \ + f(rotxor64); \ + f(speck); \ + f(quarcha); \ + f(sparkle); \ + f(siphash); \ + f(hsiphash); \ + f(lfsr); \ + f(lfsr2); + +forall(declare_it) + +static int compare_cycles(const void *a, const void *b) +{ + return *((cycles_t *)a) - *((cycles_t *)b); +} + +static int __init mod_init(void) +{ + enum { WARMUP = 6000, TRIALS = 100000, IDLE = 1 * 1000 }; + int ret = 0, i; + cycles_t *trial_times; + forall(local_it) + unsigned long flags; + DEFINE_SPINLOCK(lock); + u32 pool[4] = { 0 }, input[4]; + + get_random_bytes(input, sizeof(input)); + + trial_times = kcalloc(TRIALS + 1, sizeof(cycles_t), GFP_KERNEL); + if (!trial_times) + return -ENOMEM; + + msleep(IDLE); + spin_lock_irqsave(&lock, flags); + forall(do_it) + spin_unlock_irqrestore(&lock, flags); + forall(report_it) + kfree(trial_times); + + /* We should never actually agree to insert the module. Choosing + * -0x1000 here is an amazing hack. It causes the kernel to not + * actually load the module, while the standard userspace tools + * don't return an error, because it's too big. */ + return -0x1000; +} + +module_init(mod_init); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("kBench9000 Cycle Counter"); +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>"); diff --git a/quarcha.c b/quarcha.c new file mode 100644 index 0000000..5643a97 --- /dev/null +++ b/quarcha.c @@ -0,0 +1,36 @@ +#include <linux/kernel.h> +#include <linux/string.h> + +#define R(r, a, b, c, d) ( \ + a += b + (r ^ key[0]), \ + d = rol32(d ^ a, 16), \ + c += d + (r ^ key[1]), \ + b = rol32(b ^ c, 12), \ + a += b + (r ^ key[2]), \ + d = rol32(d ^ a, 8), \ + c += d + (r ^ key[3]), \ + b = rol32(b ^ c, 7)) + +static const u32 rc[4] = { + /* expand 32-byte k */ + 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U +}; + +static inline void blockcipher(u32 block[4], const u32 key[4]) +{ + R(rc[0], block[0], block[1], block[2], block[3]); + R(rc[1], block[3], block[2], block[1], block[0]); + R(rc[2], block[2], block[3], block[0], block[1]); + R(rc[3], block[1], block[0], block[3], block[2]); +} + +void mix_quarcha(u32 h[4], const u32 m[4]) +{ + u32 n[4]; + memcpy(n, h, sizeof(n)); + blockcipher(h, m); + h[0] ^= n[0]; + h[1] ^= n[1]; + h[2] ^= n[2]; + h[3] ^= n[3]; +} diff --git a/rotxor32.c b/rotxor32.c new file mode 100644 index 0000000..5e37599 --- /dev/null +++ b/rotxor32.c @@ -0,0 +1,9 @@ +#include <linux/kernel.h> + +void mix_rotxor32(u32 h[4], const u32 v[4]) +{ + h[0] = ror32(h[0], 7) ^ v[0]; + h[1] = ror32(h[1], 7) ^ v[1]; + h[2] = ror32(h[2], 7) ^ v[2]; + h[3] = ror32(h[3], 7) ^ v[3]; +} diff --git a/rotxor64.c b/rotxor64.c new file mode 100644 index 0000000..21acacf --- /dev/null +++ b/rotxor64.c @@ -0,0 +1,10 @@ +#include <linux/kernel.h> + +void mix_rotxor64(u32 h[4], const u32 v[4]) +{ + u64 *h64 = (u64 *)h; + const u64 *v64 = (const u64 *)v; + + h64[0] = ror64(h64[0], 19) ^ v64[0]; + h64[1] = ror64(h64[1], 19) ^ v64[1]; +} @@ -0,0 +1,35 @@ +#!/bin/bash +set -e + +nob_cpus() { + echo "[+] Setting non-boot CPUs to status $1" + for i in /sys/devices/system/cpu/*/online; do + echo "$1" > "$i" + done +} + +noturbo() { + echo "[+] Setting no-turbo to status $1" + if [[ -e /sys/devices/system/cpu/intel_pstate/no_turbo ]]; then + echo "$1" > /sys/devices/system/cpu/intel_pstate/no_turbo + else + local val + [[ $1 == 0 ]] && val=0x850089 + [[ $1 == 1 ]] && val=0x4000850089 + [[ -n $val ]] || return 0 + wrmsr -a 0x1a0 $val + fi +} + +[[ -e kbench9000.ko ]] + +trap "nob_cpus 1; noturbo 0;" INT TERM EXIT +noturbo 1 +nob_cpus 0 + +echo "[+] Inserting module to run tests" +stamp="$(date +%s)" +insmod kbench9000.ko stamp="$stamp" + +echo "[+] Gathering results" +dmesg | sed -n "s/.*kbench9000: $stamp: \\(.*\\)/\\x1b[37m\\x1b[44m\\x1b[1m\\1\\x1b[0m/p" diff --git a/siphash.c b/siphash.c new file mode 100644 index 0000000..ccbeb20 --- /dev/null +++ b/siphash.c @@ -0,0 +1,22 @@ +#include <linux/kernel.h> + +#define SIPROUND \ + do { \ + s64[0] += s64[1]; s64[1] = rol64(s64[1], 13); s64[1] ^= s64[0]; s64[0] = rol64(s64[0], 32); \ + s64[2] += s64[3]; s64[3] = rol64(s64[3], 16); s64[3] ^= s64[2]; \ + s64[0] += s64[3]; s64[3] = rol64(s64[3], 21); s64[3] ^= s64[0]; \ + s64[2] += s64[1]; s64[1] = rol64(s64[1], 17); s64[1] ^= s64[2]; s64[2] = rol64(s64[2], 32); \ + } while (0) + +void mix_siphash(u32 s[4], const u32 v[4]) +{ + u64 *s64 = (u64 *)s; + const u64 *v64 = (const u64 *)v; + size_t i; + + for (i = 0; i < 2; ++i) { + s64[3] ^= v64[i]; + SIPROUND; + s64[0] ^= v64[i]; + } +} diff --git a/sparkle.c b/sparkle.c new file mode 100644 index 0000000..55ef0d7 --- /dev/null +++ b/sparkle.c @@ -0,0 +1,60 @@ +#include <linux/kernel.h> +#include <linux/string.h> + +static const u32 r[] = { + 0xB7E15162, 0xBF715880, 0x38B4DA56, 0x324E7738, + 0xBB1185EB, 0x4F7C7B57, 0xCFBFA1C8, 0xC2B3293D +}; + +static inline void sparkle256(u32 *state) +{ + int i, j; + u32 rc, tmpx, tmpy, x0, y0; + + for (i = 0; i < 6; i ++) { + state[1] ^= r[i]; + state[3] ^= i; + for (j = 0; j < 8; j += 2) { + rc = r[j >> 1]; + state[j] += ror32(state[j + 1], 31); + state[j + 1] ^= ror32(state[j], 24); + state[j] ^= rc; + state[j] += ror32(state[j + 1], 17); + state[j + 1] ^= ror32(state[j], 17); + state[j] ^= rc; + state[j] += state[j+1]; + state[j + 1] ^= ror32(state[j], 31); + state[j] ^= rc; + state[j] += ror32(state[j + 1], 24); + state[j + 1] ^= ror32(state[j], 16); + state[j] ^= rc; + } + tmpx = x0 = state[0]; + tmpy = y0 = state[1]; + for (j = 2; j < 4; j += 2) { + tmpx ^= state[j]; + tmpy ^= state[j + 1]; + } + tmpx = ror32((tmpx ^ (tmpx << 16)), 16); + tmpy = ror32((tmpy ^ (tmpy << 16)), 16); + for (j = 2; j < 4; j += 2) { + state[j - 2] = state[j + 4] ^ state[j] ^ tmpy; + state[j + 4] = state[j]; + state[j - 1] = state[j+ 4 + 1] ^ state[j + 1] ^ tmpx; + state[j + 4 + 1] = state[j + 1]; + } + state[4 - 2] = state[4] ^ x0 ^ tmpy; + state[4] = x0; + state[4 - 1] = state[4 + 1] ^ y0 ^ tmpx; + state[4 + 1] = y0; + } +} + +void mix_sparkle(u32 h[4], const u32 v[4]) +{ + u32 s[8]; + memcpy(s, h, sizeof(u32) * 4); + memcpy(s + 4, v, sizeof(u32) * 4); + sparkle256(s); + memcpy(h, s, sizeof(u32) * 4); +} @@ -0,0 +1,29 @@ +#include <linux/kernel.h> + +#define R(x, y, k) (x = ror64(x, 8), x += y, x ^= k, y = rol64(y, 3), y ^= x) +#define ROUNDS 32 + +static inline void speck(u64 ct[2], const u64 pt[2], const u64 key[2]) +{ + u64 y = pt[0], x = pt[1], b = key[0], a = key[1]; + int i; + + R(x, y, b); + for (i = 0; i < ROUNDS - 1; ++i) { + R(a, b, i); + R(x, y, b); + } + ct[0] = y; + ct[1] = x; +} + +void mix_speck(u32 h[4], const u32 v[4]) +{ + u32 n[4]; + + speck((u64 *)n, (u64 *)h, (const u64 *)v); + h[0] ^= n[0]; + h[1] ^= n[1]; + h[2] ^= n[2]; + h[3] ^= n[3]; +} diff --git a/spelvin.c b/spelvin.c new file mode 100644 index 0000000..a6f2100 --- /dev/null +++ b/spelvin.c @@ -0,0 +1,26 @@ +#include <linux/kernel.h> + +void mix_spelvin(u32 h[4], const u32 v[4]) +{ + u32 a = h[0] ^ v[0], b = h[1] ^ v[1]; + u32 c = h[2] ^ v[2], d = h[3] ^ v[3]; + + a += b; c += d; + b = rol32(b, 6); d = rol32(d, 27); + d ^= a; b ^= c; + + a += b; c += d; + b = rol32(b, 16); d = rol32(d, 14); + d ^= a; b ^= c; + + a += b; c += d; + b = rol32(b, 6); d = rol32(d, 27); + d ^= a; b ^= c; + + a += b; c += d; + b = rol32(b, 16); d = rol32(d, 14); + d ^= a; b ^= c; + + h[0] = a; h[1] = b; + h[2] = c; h[3] = d; +} |