aboutsummaryrefslogtreecommitdiffstats
path: root/main.c
blob: a18d74478c20e5d84cbc3bcc5673b6d285800731 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
/* SPDX-License-Identifier: GPL-2.0
 *
 * Copyright (C) 2018-2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 */

#include <linux/init.h>
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/sort.h>
#include <linux/random.h>
#include <linux/perf_event.h>
#include <asm/cpufeature.h>
#include <asm/processor.h>
#include <asm/fpu/api.h>
#include <asm/simd.h>

static unsigned long stamp = 0;
module_param(stamp, ulong, 0);

#define declare_it(name) bool generate_1k_ ##name(u8 out[1024])

#define local_it(name) cycles_t median_ ##name = 0

#define do_it(name) do { \
	u32 eax = 0, ebx = 0, ecx = 0, edx = 0; \
	for (i = 0; i < TRIALS; ++i) \
		asm volatile("cpuid" : "+a" (eax), "=b" (ebx), "=d" (edx), "+c" (ecx)); \
	for (i = 0; i < TRIALS; ++i) \
		ret |= generate_1k_ ##name(out); \
	for (i = 0; i <= TRIALS; ++i) { \
		trial_times[i] = native_read_pmc(pmc_index); \
		ret |= generate_1k_ ##name(out); \
	} \
	for (i = 0; i < TRIALS; ++i) \
		trial_times[i] = trial_times[i + 1] - trial_times[i]; \
	sort(trial_times, TRIALS + 1, sizeof(cycles_t), compare_cycles, NULL); \
	median_ ## name = trial_times[TRIALS / 2]; \
} while (0)

#define report_it(name) do { \
	pr_err("%lu: %12s: %6llu cycles per call\n", stamp, #name, median_ ## name); \
} while (0)

#define forall(f) \
	f(rdrand); \
	f(rdseed); \
	f(kernel);

forall(declare_it)

static int compare_cycles(const void *a, const void *b)
{
	return *((cycles_t *)a) - *((cycles_t *)b);
}

static int __init mod_init(void)
{
	enum { TRIALS = 30000 };
	int ret = 0, i;
	cycles_t *trial_times;
	forall(local_it)
	unsigned long flags;
	struct perf_event *cycles_event;
	int pmc_index;
	static struct perf_event_attr perf_cycles_attr = {
		.type = PERF_TYPE_HARDWARE,
		.config = PERF_COUNT_HW_CPU_CYCLES,
		.size = sizeof(struct perf_event_attr),
		.pinned = 1,
		.disabled = 0,
		.exclude_user = 1
	};
	DEFINE_SPINLOCK(lock);
	u8 out[1024];

	trial_times = kcalloc(TRIALS + 1, sizeof(cycles_t), GFP_KERNEL);
	if (!trial_times)
		return -ENOMEM;

	msleep(1000);
	spin_lock_irqsave(&lock, flags);
	cycles_event = perf_event_create_kernel_counter(&perf_cycles_attr, raw_smp_processor_id(), NULL, NULL, NULL);
	if (IS_ERR(cycles_event)) {
		pr_err("unable to create perf counter: %ld\n", PTR_ERR(cycles_event));
		goto skip;
	}
	pmc_index = cycles_event->hw.event_base_rdpmc;

	forall(do_it)

	perf_event_release_kernel(cycles_event);
skip:
	spin_unlock_irqrestore(&lock, flags);
	forall(report_it)
	kfree(trial_times);
	
	/* We should never actually agree to insert the module. Choosing
	 * -0x1000 here is an amazing hack. It causes the kernel to not
	 * actually load the module, while the standard userspace tools
	 * don't return an error, because it's too big. */
	return -0x1000;
}

module_init(mod_init);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("kBench9000 Cycle Counter");
MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");