aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile20
-rw-r--r--README.md26
-rw-r--r--hsiphash.c20
-rw-r--r--lfsr.c18
-rw-r--r--lfsr2.c24
-rw-r--r--main.c95
-rw-r--r--quarcha.c36
-rw-r--r--rotxor32.c9
-rw-r--r--rotxor64.c10
-rwxr-xr-xrun.sh35
-rw-r--r--siphash.c22
-rw-r--r--sparkle.c60
-rw-r--r--speck.c29
-rw-r--r--spelvin.c26
14 files changed, 430 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..3677112
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,20 @@
+ifneq ($(KERNELRELEASE),)
+kbench9000-y := $(sort $(patsubst %.c,%.o,$(filter-out %.mod.c,$(subst $(M)/,,$(wildcard $(M)/*.c)))))
+obj-m := kbench9000.o
+ccflags-y += -O3
+ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
+else
+KERNELDIR ?= /lib/modules/$(shell uname -r)/build
+PWD := $(shell pwd)
+
+default: build
+
+run: build
+ sudo ./run.sh
+build:
+ $(MAKE) -C $(KERNELDIR) M=$(PWD)
+clean:
+ $(MAKE) -C $(KERNELDIR) M=$(PWD) clean
+.PHONY: default run build clean
+endif
+
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3fd3180
--- /dev/null
+++ b/README.md
@@ -0,0 +1,26 @@
+# kBench9000 – simple kernel land cycle counter
+### by [Jason A. Donenfeld](mailto:jason@zx2c4.com)
+
+This is a very simple kernel land cycle counter. To use, simply edit `function.h`,
+add any other `.c` files and mention them in the `kbench9000-y +=` line of the
+`Makefile`, and then type:
+
+```
+$ make run
+```
+
+![Expected kBench9000 output](https://data.zx2c4.com/kbench9000-screenshot.png)
+
+### Kernel Toolchain
+
+You'll need to have a working kernel toolchain, usually achievable by:
+
+```
+$ sudo apt install linux-headers-$(uname -r) build-essential
+```
+
+or
+
+```
+$ sudo dnf install kernel-devel @development-tools
+```
diff --git a/hsiphash.c b/hsiphash.c
new file mode 100644
index 0000000..7c2263c
--- /dev/null
+++ b/hsiphash.c
@@ -0,0 +1,20 @@
+#include <linux/kernel.h>
+
+#define HSIPROUND \
+ do { \
+ s[0] += s[1]; s[1] = rol32(s[1], 5); s[1] ^= s[0]; s[0] = rol32(s[0], 16); \
+ s[2] += s[3]; s[3] = rol32(s[3], 8); s[3] ^= s[2]; \
+ s[0] += s[3]; s[3] = rol32(s[3], 7); s[3] ^= s[0]; \
+ s[2] += s[1]; s[1] = rol32(s[1], 13); s[1] ^= s[2]; s[2] = rol32(s[2], 16); \
+ } while (0)
+
+void mix_hsiphash(u32 s[4], const u32 v[4])
+{
+ size_t i;
+
+ for (i = 0; i < 4; ++i) {
+ s[3] ^= v[i];
+ HSIPROUND;
+ s[0] ^= v[i];
+ }
+}
diff --git a/lfsr.c b/lfsr.c
new file mode 100644
index 0000000..664760a
--- /dev/null
+++ b/lfsr.c
@@ -0,0 +1,18 @@
+#include <linux/kernel.h>
+
+void mix_lfsr(u32 h[4], const u32 v[4])
+{
+ u32 w;
+ int i;
+
+ for (i = 0; i < 4; ++i) {
+ w = h[0] ^ h[1] ^ h[3] ^ v[i];
+ w ^= w << 17;
+ w ^= w >> 6;
+ w ^= w >> 9;
+ h[0] = h[1];
+ h[1] = h[2];
+ h[2] = h[3];
+ h[3] = w;
+ }
+}
diff --git a/lfsr2.c b/lfsr2.c
new file mode 100644
index 0000000..5c40bf9
--- /dev/null
+++ b/lfsr2.c
@@ -0,0 +1,24 @@
+#include <linux/kernel.h>
+
+void mix_lfsr2(u32 h[4], const u32 v[4])
+{
+ size_t i;
+ u32 w;
+
+#define R(a) ({ \
+ u32 x = (a); \
+ x ^= rol32(x, 1) ^ rol32(x, 2); \
+ x ^= rol32(x, 7) ^ rol32(x, 14); \
+ x ^= rol32(x, 12) ^ rol32(x, 24); \
+ x; \
+})
+ for (i = 0; i < 4; ++i) {
+ h[0] ^= v[i];
+ w = R(h[0]) ^ (h[0] << 1) ^ (h[2] << 2) ^ R(h[3]);
+ h[0] = h[1];
+ h[1] = h[2];
+ h[2] = h[3];
+ h[3] = w;
+ }
+#undef R
+}
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..cab65b3
--- /dev/null
+++ b/main.c
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright (C) 2018-2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/random.h>
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+#include <asm/fpu/api.h>
+#include <asm/simd.h>
+
+static unsigned long stamp = 0;
+module_param(stamp, ulong, 0);
+
+#define declare_it(name) bool mix_ ##name(u32 h[4], const u32 v[4])
+
+#define local_it(name) cycles_t median_ ##name = 0
+
+#define do_it(name) do { \
+ u32 eax = 0, ebx = 0, ecx = 0, edx = 0; \
+ for (i = 0; i < WARMUP; ++i) \
+ ret |= mix_ ##name(pool, input); \
+ asm volatile("cpuid" : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx)); \
+ for (i = 0; i <= TRIALS; ++i) { \
+ trial_times[i] = get_cycles(); \
+ ret |= mix_ ##name(pool, input); \
+ } \
+ for (i = 0; i < TRIALS; ++i) \
+ trial_times[i] = trial_times[i + 1] - trial_times[i]; \
+ sort(trial_times, TRIALS + 1, sizeof(cycles_t), compare_cycles, NULL); \
+ median_ ## name = trial_times[TRIALS / 2]; \
+} while (0)
+
+#define report_it(name) do { \
+ pr_err("%lu: %12s: %6llu cycles per call\n", stamp, #name, median_ ## name); \
+} while (0)
+
+#define forall(f) \
+ f(spelvin); \
+ f(rotxor32); \
+ f(rotxor64); \
+ f(speck); \
+ f(quarcha); \
+ f(sparkle); \
+ f(siphash); \
+ f(hsiphash); \
+ f(lfsr); \
+ f(lfsr2);
+
+forall(declare_it)
+
+static int compare_cycles(const void *a, const void *b)
+{
+ return *((cycles_t *)a) - *((cycles_t *)b);
+}
+
+static int __init mod_init(void)
+{
+ enum { WARMUP = 6000, TRIALS = 100000, IDLE = 1 * 1000 };
+ int ret = 0, i;
+ cycles_t *trial_times;
+ forall(local_it)
+ unsigned long flags;
+ DEFINE_SPINLOCK(lock);
+ u32 pool[4] = { 0 }, input[4];
+
+ get_random_bytes(input, sizeof(input));
+
+ trial_times = kcalloc(TRIALS + 1, sizeof(cycles_t), GFP_KERNEL);
+ if (!trial_times)
+ return -ENOMEM;
+
+ msleep(IDLE);
+ spin_lock_irqsave(&lock, flags);
+ forall(do_it)
+ spin_unlock_irqrestore(&lock, flags);
+ forall(report_it)
+ kfree(trial_times);
+
+ /* We should never actually agree to insert the module. Choosing
+ * -0x1000 here is an amazing hack. It causes the kernel to not
+ * actually load the module, while the standard userspace tools
+ * don't return an error, because it's too big. */
+ return -0x1000;
+}
+
+module_init(mod_init);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("kBench9000 Cycle Counter");
+MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
diff --git a/quarcha.c b/quarcha.c
new file mode 100644
index 0000000..5643a97
--- /dev/null
+++ b/quarcha.c
@@ -0,0 +1,36 @@
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+#define R(r, a, b, c, d) ( \
+ a += b + (r ^ key[0]), \
+ d = rol32(d ^ a, 16), \
+ c += d + (r ^ key[1]), \
+ b = rol32(b ^ c, 12), \
+ a += b + (r ^ key[2]), \
+ d = rol32(d ^ a, 8), \
+ c += d + (r ^ key[3]), \
+ b = rol32(b ^ c, 7))
+
+static const u32 rc[4] = {
+ /* expand 32-byte k */
+ 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U
+};
+
+static inline void blockcipher(u32 block[4], const u32 key[4])
+{
+ R(rc[0], block[0], block[1], block[2], block[3]);
+ R(rc[1], block[3], block[2], block[1], block[0]);
+ R(rc[2], block[2], block[3], block[0], block[1]);
+ R(rc[3], block[1], block[0], block[3], block[2]);
+}
+
+void mix_quarcha(u32 h[4], const u32 m[4])
+{
+ u32 n[4];
+ memcpy(n, h, sizeof(n));
+ blockcipher(h, m);
+ h[0] ^= n[0];
+ h[1] ^= n[1];
+ h[2] ^= n[2];
+ h[3] ^= n[3];
+}
diff --git a/rotxor32.c b/rotxor32.c
new file mode 100644
index 0000000..5e37599
--- /dev/null
+++ b/rotxor32.c
@@ -0,0 +1,9 @@
+#include <linux/kernel.h>
+
+void mix_rotxor32(u32 h[4], const u32 v[4])
+{
+ h[0] = ror32(h[0], 7) ^ v[0];
+ h[1] = ror32(h[1], 7) ^ v[1];
+ h[2] = ror32(h[2], 7) ^ v[2];
+ h[3] = ror32(h[3], 7) ^ v[3];
+}
diff --git a/rotxor64.c b/rotxor64.c
new file mode 100644
index 0000000..21acacf
--- /dev/null
+++ b/rotxor64.c
@@ -0,0 +1,10 @@
+#include <linux/kernel.h>
+
+void mix_rotxor64(u32 h[4], const u32 v[4])
+{
+ u64 *h64 = (u64 *)h;
+ const u64 *v64 = (const u64 *)v;
+
+ h64[0] = ror64(h64[0], 19) ^ v64[0];
+ h64[1] = ror64(h64[1], 19) ^ v64[1];
+}
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..39c4719
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+set -e
+
+nob_cpus() {
+ echo "[+] Setting non-boot CPUs to status $1"
+ for i in /sys/devices/system/cpu/*/online; do
+ echo "$1" > "$i"
+ done
+}
+
+noturbo() {
+ echo "[+] Setting no-turbo to status $1"
+ if [[ -e /sys/devices/system/cpu/intel_pstate/no_turbo ]]; then
+ echo "$1" > /sys/devices/system/cpu/intel_pstate/no_turbo
+ else
+ local val
+ [[ $1 == 0 ]] && val=0x850089
+ [[ $1 == 1 ]] && val=0x4000850089
+ [[ -n $val ]] || return 0
+ wrmsr -a 0x1a0 $val
+ fi
+}
+
+[[ -e kbench9000.ko ]]
+
+trap "nob_cpus 1; noturbo 0;" INT TERM EXIT
+noturbo 1
+nob_cpus 0
+
+echo "[+] Inserting module to run tests"
+stamp="$(date +%s)"
+insmod kbench9000.ko stamp="$stamp"
+
+echo "[+] Gathering results"
+dmesg | sed -n "s/.*kbench9000: $stamp: \\(.*\\)/\\x1b[37m\\x1b[44m\\x1b[1m\\1\\x1b[0m/p"
diff --git a/siphash.c b/siphash.c
new file mode 100644
index 0000000..ccbeb20
--- /dev/null
+++ b/siphash.c
@@ -0,0 +1,22 @@
+#include <linux/kernel.h>
+
+#define SIPROUND \
+ do { \
+ s64[0] += s64[1]; s64[1] = rol64(s64[1], 13); s64[1] ^= s64[0]; s64[0] = rol64(s64[0], 32); \
+ s64[2] += s64[3]; s64[3] = rol64(s64[3], 16); s64[3] ^= s64[2]; \
+ s64[0] += s64[3]; s64[3] = rol64(s64[3], 21); s64[3] ^= s64[0]; \
+ s64[2] += s64[1]; s64[1] = rol64(s64[1], 17); s64[1] ^= s64[2]; s64[2] = rol64(s64[2], 32); \
+ } while (0)
+
+void mix_siphash(u32 s[4], const u32 v[4])
+{
+ u64 *s64 = (u64 *)s;
+ const u64 *v64 = (const u64 *)v;
+ size_t i;
+
+ for (i = 0; i < 2; ++i) {
+ s64[3] ^= v64[i];
+ SIPROUND;
+ s64[0] ^= v64[i];
+ }
+}
diff --git a/sparkle.c b/sparkle.c
new file mode 100644
index 0000000..55ef0d7
--- /dev/null
+++ b/sparkle.c
@@ -0,0 +1,60 @@
+#include <linux/kernel.h>
+#include <linux/string.h>
+
+static const u32 r[] = {
+ 0xB7E15162, 0xBF715880, 0x38B4DA56, 0x324E7738,
+ 0xBB1185EB, 0x4F7C7B57, 0xCFBFA1C8, 0xC2B3293D
+};
+
+static inline void sparkle256(u32 *state)
+{
+ int i, j;
+ u32 rc, tmpx, tmpy, x0, y0;
+
+ for (i = 0; i < 6; i ++) {
+ state[1] ^= r[i];
+ state[3] ^= i;
+ for (j = 0; j < 8; j += 2) {
+ rc = r[j >> 1];
+ state[j] += ror32(state[j + 1], 31);
+ state[j + 1] ^= ror32(state[j], 24);
+ state[j] ^= rc;
+ state[j] += ror32(state[j + 1], 17);
+ state[j + 1] ^= ror32(state[j], 17);
+ state[j] ^= rc;
+ state[j] += state[j+1];
+ state[j + 1] ^= ror32(state[j], 31);
+ state[j] ^= rc;
+ state[j] += ror32(state[j + 1], 24);
+ state[j + 1] ^= ror32(state[j], 16);
+ state[j] ^= rc;
+ }
+ tmpx = x0 = state[0];
+ tmpy = y0 = state[1];
+ for (j = 2; j < 4; j += 2) {
+ tmpx ^= state[j];
+ tmpy ^= state[j + 1];
+ }
+ tmpx = ror32((tmpx ^ (tmpx << 16)), 16);
+ tmpy = ror32((tmpy ^ (tmpy << 16)), 16);
+ for (j = 2; j < 4; j += 2) {
+ state[j - 2] = state[j + 4] ^ state[j] ^ tmpy;
+ state[j + 4] = state[j];
+ state[j - 1] = state[j+ 4 + 1] ^ state[j + 1] ^ tmpx;
+ state[j + 4 + 1] = state[j + 1];
+ }
+ state[4 - 2] = state[4] ^ x0 ^ tmpy;
+ state[4] = x0;
+ state[4 - 1] = state[4 + 1] ^ y0 ^ tmpx;
+ state[4 + 1] = y0;
+ }
+}
+
+void mix_sparkle(u32 h[4], const u32 v[4])
+{
+ u32 s[8];
+ memcpy(s, h, sizeof(u32) * 4);
+ memcpy(s + 4, v, sizeof(u32) * 4);
+ sparkle256(s);
+ memcpy(h, s, sizeof(u32) * 4);
+}
diff --git a/speck.c b/speck.c
new file mode 100644
index 0000000..4b839e7
--- /dev/null
+++ b/speck.c
@@ -0,0 +1,29 @@
+#include <linux/kernel.h>
+
+#define R(x, y, k) (x = ror64(x, 8), x += y, x ^= k, y = rol64(y, 3), y ^= x)
+#define ROUNDS 32
+
+static inline void speck(u64 ct[2], const u64 pt[2], const u64 key[2])
+{
+ u64 y = pt[0], x = pt[1], b = key[0], a = key[1];
+ int i;
+
+ R(x, y, b);
+ for (i = 0; i < ROUNDS - 1; ++i) {
+ R(a, b, i);
+ R(x, y, b);
+ }
+ ct[0] = y;
+ ct[1] = x;
+}
+
+void mix_speck(u32 h[4], const u32 v[4])
+{
+ u32 n[4];
+
+ speck((u64 *)n, (u64 *)h, (const u64 *)v);
+ h[0] ^= n[0];
+ h[1] ^= n[1];
+ h[2] ^= n[2];
+ h[3] ^= n[3];
+}
diff --git a/spelvin.c b/spelvin.c
new file mode 100644
index 0000000..a6f2100
--- /dev/null
+++ b/spelvin.c
@@ -0,0 +1,26 @@
+#include <linux/kernel.h>
+
+void mix_spelvin(u32 h[4], const u32 v[4])
+{
+ u32 a = h[0] ^ v[0], b = h[1] ^ v[1];
+ u32 c = h[2] ^ v[2], d = h[3] ^ v[3];
+
+ a += b; c += d;
+ b = rol32(b, 6); d = rol32(d, 27);
+ d ^= a; b ^= c;
+
+ a += b; c += d;
+ b = rol32(b, 16); d = rol32(d, 14);
+ d ^= a; b ^= c;
+
+ a += b; c += d;
+ b = rol32(b, 6); d = rol32(d, 27);
+ d ^= a; b ^= c;
+
+ a += b; c += d;
+ b = rol32(b, 16); d = rol32(d, 14);
+ d ^= a; b ^= c;
+
+ h[0] = a; h[1] = b;
+ h[2] = c; h[3] = d;
+}