tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156

// SPDX-License-Identifier: GPL-2.0+
//
// Copyright 2022, Michael Ellerman, IBM Corp.
//
// Test that the 4PB address space SLB handling doesn't corrupt userspace registers
// (r9-r13) due to a SLB fault while saving the PPR.
//
// The bug was introduced in f384796c4 ("powerpc/mm: Add support for handling > 512TB
// address in SLB miss") and fixed in 4c2de74cc869 ("powerpc/64: Interrupts save PPR on
// stack rather than thread_struct").
//
// To hit the bug requires the task struct and kernel stack to be in different segments.
// Usually that requires more than 1TB of RAM, or if that's not practical, boot the kernel
// with "disable_1tb_segments".
//
// The test works by creating mappings above 512TB, to trigger the large address space
// support. It creates 64 mappings, double the size of the SLB, to cause SLB faults on
// each access (assuming naive replacement). It then loops over those mappings touching
// each, and checks that r9-r13 aren't corrupted.
//
// It then forks another child and tries again, because a new child process will get a new
// kernel stack and thread struct allocated, which may be more optimally placed to trigger
// the bug. It would probably be better to leave the previous child processes hanging
// around, so that kernel stack & thread struct allocations are not reused, but that would
// amount to a 30 second fork bomb. The current design reliably triggers the bug on
// unpatched kernels.

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>

#include "utils.h"

#ifndef MAP_FIXED_NOREPLACE
#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB
#endif

#define BASE_ADDRESS (1ul << 50) // 1PB
#define STRIDE	     (2ul << 40) // 2TB
#define SLB_SIZE     32
#define NR_MAPPINGS  (SLB_SIZE * 2)

static volatile sig_atomic_t signaled;

static void signal_handler(int sig)
{
	signaled = 1;
}

#define CHECK_REG(_reg)                                                                \
	if (_reg != _reg##_orig) {                                                     \
		printf(str(_reg) " corrupted! Expected 0x%lx != 0x%lx\n", _reg##_orig, \
		       _reg);                                                          \
		_exit(1);                                                              \
	}

static int touch_mappings(void)
{
	unsigned long r9_orig, r10_orig, r11_orig, r12_orig, r13_orig;
	unsigned long r9, r10, r11, r12, r13;
	unsigned long addr, *p;
	int i;

	for (i = 0; i < NR_MAPPINGS; i++) {
		addr = BASE_ADDRESS + (i * STRIDE);
		p = (unsigned long *)addr;

		asm volatile("mr   %0, %%r9	;" // Read original GPR values
			     "mr   %1, %%r10	;"
			     "mr   %2, %%r11	;"
			     "mr   %3, %%r12	;"
			     "mr   %4, %%r13	;"
			     "std %10, 0(%11)   ;" // Trigger SLB fault
			     "mr   %5, %%r9	;" // Save possibly corrupted values
			     "mr   %6, %%r10	;"
			     "mr   %7, %%r11	;"
			     "mr   %8, %%r12	;"
			     "mr   %9, %%r13	;"
			     "mr   %%r9,  %0	;" // Restore original values
			     "mr   %%r10, %1	;"
			     "mr   %%r11, %2	;"
			     "mr   %%r12, %3	;"
			     "mr   %%r13, %4	;"
			     : "=&b"(r9_orig), "=&b"(r10_orig), "=&b"(r11_orig),
			       "=&b"(r12_orig), "=&b"(r13_orig), "=&b"(r9), "=&b"(r10),
			       "=&b"(r11), "=&b"(r12), "=&b"(r13)
			     : "b"(i), "b"(p)
			     : "r9", "r10", "r11", "r12", "r13");

		CHECK_REG(r9);
		CHECK_REG(r10);
		CHECK_REG(r11);
		CHECK_REG(r12);
		CHECK_REG(r13);
	}

	return 0;
}

static int test(void)
{
	unsigned long page_size, addr, *p;
	struct sigaction action;
	bool hash_mmu;
	int i, status;
	pid_t pid;

	// This tests a hash MMU specific bug.
	FAIL_IF(using_hash_mmu(&hash_mmu));
	SKIP_IF(!hash_mmu);

	page_size = sysconf(_SC_PAGESIZE);

	for (i = 0; i < NR_MAPPINGS; i++) {
		addr = BASE_ADDRESS + (i * STRIDE);

		p = mmap((void *)addr, page_size, PROT_READ | PROT_WRITE,
			 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
		if (p == MAP_FAILED) {
			perror("mmap");
			printf("Error: couldn't mmap(), confirm kernel has 4PB support?\n");
			return 1;
		}
	}

	action.sa_handler = signal_handler;
	action.sa_flags = SA_RESTART;
	FAIL_IF(sigaction(SIGALRM, &action, NULL) < 0);

	// Seen to always crash in under ~10s on affected kernels.
	alarm(30);

	while (!signaled) {
		// Fork new processes, to increase the chance that we hit the case where
		// the kernel stack and task struct are in different segments.
		pid = fork();
		if (pid == 0)
			exit(touch_mappings());

		FAIL_IF(waitpid(-1, &status, 0) == -1);
		FAIL_IF(WIFSIGNALED(status));
		FAIL_IF(!WIFEXITED(status));
		FAIL_IF(WEXITSTATUS(status));
	}

	return 0;
}

int main(void)
{
	return test_harness(test, "large_vm_gpr_corruption");
}