aboutsummaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/x86
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--tools/testing/selftests/x86/.gitignore2
-rw-r--r--tools/testing/selftests/x86/Makefile21
-rw-r--r--tools/testing/selftests/x86/amx.c853
-rwxr-xr-xtools/testing/selftests/x86/check_cc.sh2
-rw-r--r--tools/testing/selftests/x86/corrupt_xstate_header.c102
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c108
-rw-r--r--tools/testing/selftests/x86/fsgsbase_restore.c245
-rw-r--r--tools/testing/selftests/x86/helpers.h25
-rw-r--r--tools/testing/selftests/x86/iopl.c78
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c2
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c4
-rw-r--r--tools/testing/selftests/x86/pkey-helpers.h219
-rw-r--r--tools/testing/selftests/x86/protection_keys.c1506
-rw-r--r--tools/testing/selftests/x86/ptrace_syscall.c8
-rw-r--r--tools/testing/selftests/x86/raw_syscall_helper_32.S2
-rw-r--r--tools/testing/selftests/x86/sigaltstack.c128
-rw-r--r--tools/testing/selftests/x86/sigreturn.c7
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c21
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c54
-rw-r--r--tools/testing/selftests/x86/syscall_nt.c47
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c491
-rw-r--r--tools/testing/selftests/x86/test_vdso.c337
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c39
-rw-r--r--tools/testing/selftests/x86/thunks.S2
-rw-r--r--tools/testing/selftests/x86/thunks_32.S2
-rw-r--r--tools/testing/selftests/x86/unwind_vdso.c23
-rw-r--r--tools/testing/selftests/x86/vdso_restorer.c15
27 files changed, 2089 insertions, 2254 deletions
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
index 7757f73ff9a3..1aaef5bf119a 100644
--- a/tools/testing/selftests/x86/.gitignore
+++ b/tools/testing/selftests/x86/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
*_32
*_64
single_step_syscall
@@ -11,5 +12,4 @@ ldt_gdt
iopl
mpx-mini-test
ioperm
-protection_keys
test_vdso
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 5d49bfec1e9a..0388c4d60af0 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -6,18 +6,19 @@ include ../lib.mk
.PHONY: all all_32 all_64 warn_32bit_failure clean
UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
+CAN_BUILD_I386 := $(shell ./check_cc.sh "$(CC)" trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh "$(CC)" trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
- protection_keys test_vdso test_vsyscall mov_ss_trap \
- syscall_arg_fault
+ test_vsyscall mov_ss_trap \
+ syscall_arg_fault fsgsbase_restore sigaltstack
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
+ corrupt_xstate_header amx
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
@@ -70,10 +71,10 @@ all_64: $(BINARIES_64)
EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
-$(BINARIES_32): $(OUTPUT)/%_32: %.c
+$(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h
$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
-$(BINARIES_64): $(OUTPUT)/%_64: %.c
+$(BINARIES_64): $(OUTPUT)/%_64: %.c helpers.h
$(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
# x86_64 users should be encouraged to install 32-bit libraries
@@ -91,6 +92,10 @@ warn_32bit_failure:
echo "If you are using a Fedora-like distribution, try:"; \
echo ""; \
echo " yum install glibc-devel.*i686"; \
+ echo ""; \
+ echo "If you are using a SUSE-like distribution, try:"; \
+ echo ""; \
+ echo " zypper install gcc-32bit glibc-devel-static-32bit"; \
exit 0;
endif
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
new file mode 100644
index 000000000000..625e42901237
--- /dev/null
+++ b/tools/testing/selftests/x86/amx.c
@@ -0,0 +1,853 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <errno.h>
+#include <pthread.h>
+#include <setjmp.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <x86intrin.h>
+
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h" /* For __cpuid_count() */
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define XSAVE_HDR_OFFSET 512
+#define XSAVE_HDR_SIZE 64
+
+struct xsave_buffer {
+ union {
+ struct {
+ char legacy[XSAVE_HDR_OFFSET];
+ char header[XSAVE_HDR_SIZE];
+ char extended[0];
+ };
+ char bytes[0];
+ };
+};
+
+static inline uint64_t xgetbv(uint32_t index)
+{
+ uint32_t eax, edx;
+
+ asm volatile("xgetbv;"
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax + ((uint64_t)edx << 32);
+}
+
+static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
+{
+ uint32_t rfbm_lo = rfbm;
+ uint32_t rfbm_hi = rfbm >> 32;
+
+ asm volatile("xsave (%%rdi)"
+ : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
+ : "memory");
+}
+
+static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
+{
+ uint32_t rfbm_lo = rfbm;
+ uint32_t rfbm_hi = rfbm >> 32;
+
+ asm volatile("xrstor (%%rdi)"
+ : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
+}
+
+/* err() exits and will not return */
+#define fatal_error(msg, ...) err(1, "[FAIL]\t" msg, ##__VA_ARGS__)
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ fatal_error("sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ fatal_error("sigaction");
+}
+
+#define XFEATURE_XTILECFG 17
+#define XFEATURE_XTILEDATA 18
+#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG)
+#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA)
+#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
+
+#define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26)
+#define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27)
+static inline void check_cpuid_xsave(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ /*
+ * CPUID.1:ECX.XSAVE[bit 26] enumerates general
+ * support for the XSAVE feature set, including
+ * XGETBV.
+ */
+ __cpuid_count(1, 0, eax, ebx, ecx, edx);
+ if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
+ fatal_error("cpuid: no CPU xsave support");
+ if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
+ fatal_error("cpuid: no OS xsave support");
+}
+
+static uint32_t xbuf_size;
+
+static struct {
+ uint32_t xbuf_offset;
+ uint32_t size;
+} xtiledata;
+
+#define CPUID_LEAF_XSTATE 0xd
+#define CPUID_SUBLEAF_XSTATE_USER 0x0
+#define TILE_CPUID 0x1d
+#define TILE_PALETTE_ID 0x1
+
+static void check_cpuid_xtiledata(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
+ eax, ebx, ecx, edx);
+
+ /*
+ * EBX enumerates the size (in bytes) required by the XSAVE
+ * instruction for an XSAVE area containing all the user state
+ * components corresponding to bits currently set in XCR0.
+ *
+ * Stash that off so it can be used to allocate buffers later.
+ */
+ xbuf_size = ebx;
+
+ __cpuid_count(CPUID_LEAF_XSTATE, XFEATURE_XTILEDATA,
+ eax, ebx, ecx, edx);
+ /*
+ * eax: XTILEDATA state component size
+ * ebx: XTILEDATA state component offset in user buffer
+ */
+ if (!eax || !ebx)
+ fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
+ eax, ebx);
+
+ xtiledata.size = eax;
+ xtiledata.xbuf_offset = ebx;
+}
+
+/* The helpers for managing XSAVE buffer and tile states: */
+
+struct xsave_buffer *alloc_xbuf(void)
+{
+ struct xsave_buffer *xbuf;
+
+ /* XSAVE buffer should be 64B-aligned. */
+ xbuf = aligned_alloc(64, xbuf_size);
+ if (!xbuf)
+ fatal_error("aligned_alloc()");
+ return xbuf;
+}
+
+static inline void clear_xstate_header(struct xsave_buffer *buffer)
+{
+ memset(&buffer->header, 0, sizeof(buffer->header));
+}
+
+static inline uint64_t get_xstatebv(struct xsave_buffer *buffer)
+{
+ /* XSTATE_BV is at the beginning of the header: */
+ return *(uint64_t *)&buffer->header;
+}
+
+static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv)
+{
+ /* XSTATE_BV is at the beginning of the header: */
+ *(uint64_t *)(&buffer->header) = bv;
+}
+
+static void set_rand_tiledata(struct xsave_buffer *xbuf)
+{
+ int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset];
+ int data;
+ int i;
+
+ /*
+ * Ensure that 'data' is never 0. This ensures that
+ * the registers are never in their initial configuration
+ * and thus never tracked as being in the init state.
+ */
+ data = rand() | 1;
+
+ for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++)
+ *ptr = data;
+}
+
+struct xsave_buffer *stashed_xsave;
+
+static void init_stashed_xsave(void)
+{
+ stashed_xsave = alloc_xbuf();
+ if (!stashed_xsave)
+ fatal_error("failed to allocate stashed_xsave\n");
+ clear_xstate_header(stashed_xsave);
+}
+
+static void free_stashed_xsave(void)
+{
+ free(stashed_xsave);
+}
+
+/* See 'struct _fpx_sw_bytes' at sigcontext.h */
+#define SW_BYTES_OFFSET 464
+/* N.B. The struct's field name varies so read from the offset. */
+#define SW_BYTES_BV_OFFSET (SW_BYTES_OFFSET + 8)
+
+static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer)
+{
+ return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET);
+}
+
+static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
+{
+ return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
+}
+
+/* Work around printf() being unsafe in signals: */
+#define SIGNAL_BUF_LEN 1000
+char signal_message_buffer[SIGNAL_BUF_LEN];
+void sig_print(char *msg)
+{
+ int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1;
+
+ strncat(signal_message_buffer, msg, left);
+}
+
+static volatile bool noperm_signaled;
+static int noperm_errs;
+/*
+ * Signal handler for when AMX is used but
+ * permission has not been obtained.
+ */
+static void handle_noperm(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ void *xbuf = ctx->uc_mcontext.fpregs;
+ struct _fpx_sw_bytes *sw_bytes;
+ uint64_t features;
+
+ /* Reset the signal message buffer: */
+ signal_message_buffer[0] = '\0';
+ sig_print("\tAt SIGILL handler,\n");
+
+ if (si->si_code != ILL_ILLOPC) {
+ noperm_errs++;
+ sig_print("[FAIL]\tInvalid signal code.\n");
+ } else {
+ sig_print("[OK]\tValid signal code (ILL_ILLOPC).\n");
+ }
+
+ sw_bytes = get_fpx_sw_bytes(xbuf);
+ /*
+ * Without permission, the signal XSAVE buffer should not
+ * have room for AMX register state (aka. xtiledata).
+ * Check that the size does not overlap with where xtiledata
+ * will reside.
+ *
+ * This also implies that no state components *PAST*
+ * XTILEDATA (features >=19) can be present in the buffer.
+ */
+ if (sw_bytes->xstate_size <= xtiledata.xbuf_offset) {
+ sig_print("[OK]\tValid xstate size\n");
+ } else {
+ noperm_errs++;
+ sig_print("[FAIL]\tInvalid xstate size\n");
+ }
+
+ features = get_fpx_sw_bytes_features(xbuf);
+ /*
+ * Without permission, the XTILEDATA feature
+ * bit should not be set.
+ */
+ if ((features & XFEATURE_MASK_XTILEDATA) == 0) {
+ sig_print("[OK]\tValid xstate mask\n");
+ } else {
+ noperm_errs++;
+ sig_print("[FAIL]\tInvalid xstate mask\n");
+ }
+
+ noperm_signaled = true;
+ ctx->uc_mcontext.gregs[REG_RIP] += 3; /* Skip the faulting XRSTOR */
+}
+
+/* Return true if XRSTOR is successful; otherwise, false. */
+static inline bool xrstor_safe(struct xsave_buffer *xbuf, uint64_t mask)
+{
+ noperm_signaled = false;
+ xrstor(xbuf, mask);
+
+ /* Print any messages produced by the signal code: */
+ printf("%s", signal_message_buffer);
+ /*
+ * Reset the buffer to make sure any future printing
+ * only outputs new messages:
+ */
+ signal_message_buffer[0] = '\0';
+
+ if (noperm_errs)
+ fatal_error("saw %d errors in noperm signal handler\n", noperm_errs);
+
+ return !noperm_signaled;
+}
+
+/*
+ * Use XRSTOR to populate the XTILEDATA registers with
+ * random data.
+ *
+ * Return true if successful; otherwise, false.
+ */
+static inline bool load_rand_tiledata(struct xsave_buffer *xbuf)
+{
+ clear_xstate_header(xbuf);
+ set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA);
+ set_rand_tiledata(xbuf);
+ return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA);
+}
+
+/* Return XTILEDATA to its initial configuration. */
+static inline void init_xtiledata(void)
+{
+ clear_xstate_header(stashed_xsave);
+ xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA);
+}
+
+enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED };
+
+/* arch_prctl() and sigaltstack() test */
+
+#define ARCH_GET_XCOMP_PERM 0x1022
+#define ARCH_REQ_XCOMP_PERM 0x1023
+
+static void req_xtiledata_perm(void)
+{
+ syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
+}
+
+static void validate_req_xcomp_perm(enum expected_result exp)
+{
+ unsigned long bitmask, expected_bitmask;
+ long rc;
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
+ if (rc) {
+ fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
+ } else if (!(bitmask & XFEATURE_MASK_XTILECFG)) {
+ fatal_error("ARCH_GET_XCOMP_PERM returns XFEATURE_XTILECFG off.");
+ }
+
+ rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
+ if (exp == FAIL_EXPECTED) {
+ if (rc) {
+ printf("[OK]\tARCH_REQ_XCOMP_PERM saw expected failure..\n");
+ return;
+ }
+
+ fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected success.\n");
+ } else if (rc) {
+ fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected failure.\n");
+ }
+
+ expected_bitmask = bitmask | XFEATURE_MASK_XTILEDATA;
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
+ if (rc) {
+ fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
+ } else if (bitmask != expected_bitmask) {
+ fatal_error("ARCH_REQ_XCOMP_PERM set a wrong bitmask: %lx, expected: %lx.\n",
+ bitmask, expected_bitmask);
+ } else {
+ printf("\tARCH_REQ_XCOMP_PERM is successful.\n");
+ }
+}
+
+static void validate_xcomp_perm(enum expected_result exp)
+{
+ bool load_success = load_rand_tiledata(stashed_xsave);
+
+ if (exp == FAIL_EXPECTED) {
+ if (load_success) {
+ noperm_errs++;
+ printf("[FAIL]\tLoad tiledata succeeded.\n");
+ } else {
+ printf("[OK]\tLoad tiledata failed.\n");
+ }
+ } else if (exp == SUCCESS_EXPECTED) {
+ if (load_success) {
+ printf("[OK]\tLoad tiledata succeeded.\n");
+ } else {
+ noperm_errs++;
+ printf("[FAIL]\tLoad tiledata failed.\n");
+ }
+ }
+}
+
+#ifndef AT_MINSIGSTKSZ
+# define AT_MINSIGSTKSZ 51
+#endif
+
+static void *alloc_altstack(unsigned int size)
+{
+ void *altstack;
+
+ altstack = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+
+ if (altstack == MAP_FAILED)
+ fatal_error("mmap() for altstack");
+
+ return altstack;
+}
+
+static void setup_altstack(void *addr, unsigned long size, enum expected_result exp)
+{
+ stack_t ss;
+ int rc;
+
+ memset(&ss, 0, sizeof(ss));
+ ss.ss_size = size;
+ ss.ss_sp = addr;
+
+ rc = sigaltstack(&ss, NULL);
+
+ if (exp == FAIL_EXPECTED) {
+ if (rc) {
+ printf("[OK]\tsigaltstack() failed.\n");
+ } else {
+ fatal_error("sigaltstack() succeeded unexpectedly.\n");
+ }
+ } else if (rc) {
+ fatal_error("sigaltstack()");
+ }
+}
+
+static void test_dynamic_sigaltstack(void)
+{
+ unsigned int small_size, enough_size;
+ unsigned long minsigstksz;
+ void *altstack;
+
+ minsigstksz = getauxval(AT_MINSIGSTKSZ);
+ printf("\tAT_MINSIGSTKSZ = %lu\n", minsigstksz);
+ /*
+ * getauxval() itself can return 0 for failure or
+ * success. But, in this case, AT_MINSIGSTKSZ
+ * will always return a >=0 value if implemented.
+ * Just check for 0.
+ */
+ if (minsigstksz == 0) {
+ printf("no support for AT_MINSIGSTKSZ, skipping sigaltstack tests\n");
+ return;
+ }
+
+ enough_size = minsigstksz * 2;
+
+ altstack = alloc_altstack(enough_size);
+ printf("\tAllocate memory for altstack (%u bytes).\n", enough_size);
+
+ /*
+ * Try setup_altstack() with a size which can not fit
+ * XTILEDATA. ARCH_REQ_XCOMP_PERM should fail.
+ */
+ small_size = minsigstksz - xtiledata.size;
+ printf("\tAfter sigaltstack() with small size (%u bytes).\n", small_size);
+ setup_altstack(altstack, small_size, SUCCESS_EXPECTED);
+ validate_req_xcomp_perm(FAIL_EXPECTED);
+
+ /*
+ * Try setup_altstack() with a size derived from
+ * AT_MINSIGSTKSZ. It should be more than large enough
+ * and thus ARCH_REQ_XCOMP_PERM should succeed.
+ */
+ printf("\tAfter sigaltstack() with enough size (%u bytes).\n", enough_size);
+ setup_altstack(altstack, enough_size, SUCCESS_EXPECTED);
+ validate_req_xcomp_perm(SUCCESS_EXPECTED);
+
+ /*
+ * Try to coerce setup_altstack() to again accept a
+ * too-small altstack. This ensures that big-enough
+ * sigaltstacks can not shrink to a too-small value
+ * once XTILEDATA permission is established.
+ */
+ printf("\tThen, sigaltstack() with small size (%u bytes).\n", small_size);
+ setup_altstack(altstack, small_size, FAIL_EXPECTED);
+}
+
+static void test_dynamic_state(void)
+{
+ pid_t parent, child, grandchild;
+
+ parent = fork();
+ if (parent < 0) {
+ /* fork() failed */
+ fatal_error("fork");
+ } else if (parent > 0) {
+ int status;
+ /* fork() succeeded. Now in the parent. */
+
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ fatal_error("arch_prctl test parent exit");
+ return;
+ }
+ /* fork() succeeded. Now in the child . */
+
+ printf("[RUN]\tCheck ARCH_REQ_XCOMP_PERM around process fork() and sigaltack() test.\n");
+
+ printf("\tFork a child.\n");
+ child = fork();
+ if (child < 0) {
+ fatal_error("fork");
+ } else if (child > 0) {
+ int status;
+
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ fatal_error("arch_prctl test child exit");
+ _exit(0);
+ }
+
+ /*
+ * The permission request should fail without an
+ * XTILEDATA-compatible signal stack
+ */
+ printf("\tTest XCOMP_PERM at child.\n");
+ validate_xcomp_perm(FAIL_EXPECTED);
+
+ /*
+ * Set up an XTILEDATA-compatible signal stack and
+ * also obtain permission to populate XTILEDATA.
+ */
+ printf("\tTest dynamic sigaltstack at child:\n");
+ test_dynamic_sigaltstack();
+
+ /* Ensure that XTILEDATA can be populated. */
+ printf("\tTest XCOMP_PERM again at child.\n");
+ validate_xcomp_perm(SUCCESS_EXPECTED);
+
+ printf("\tFork a grandchild.\n");
+ grandchild = fork();
+ if (grandchild < 0) {
+ /* fork() failed */
+ fatal_error("fork");
+ } else if (!grandchild) {
+ /* fork() succeeded. Now in the (grand)child. */
+ printf("\tTest XCOMP_PERM at grandchild.\n");
+
+ /*
+ * Ensure that the grandchild inherited
+ * permission and a compatible sigaltstack:
+ */
+ validate_xcomp_perm(SUCCESS_EXPECTED);
+ } else {
+ int status;
+ /* fork() succeeded. Now in the parent. */
+
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ fatal_error("fork test grandchild");
+ }
+
+ _exit(0);
+}
+
+/*
+ * Save current register state and compare it to @xbuf1.'
+ *
+ * Returns false if @xbuf1 matches the registers.
+ * Returns true if @xbuf1 differs from the registers.
+ */
+static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1)
+{
+ struct xsave_buffer *xbuf2;
+ int ret;
+
+ xbuf2 = alloc_xbuf();
+ if (!xbuf2)
+ fatal_error("failed to allocate XSAVE buffer\n");
+
+ xsave(xbuf2, XFEATURE_MASK_XTILEDATA);
+ ret = memcmp(&xbuf1->bytes[xtiledata.xbuf_offset],
+ &xbuf2->bytes[xtiledata.xbuf_offset],
+ xtiledata.size);
+
+ free(xbuf2);
+
+ if (ret == 0)
+ return false;
+ return true;
+}
+
+static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf)
+{
+ int ret = __validate_tiledata_regs(xbuf);
+
+ if (ret != 0)
+ fatal_error("TILEDATA registers changed");
+}
+
+static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf)
+{
+ int ret = __validate_tiledata_regs(xbuf);
+
+ if (ret == 0)
+ fatal_error("TILEDATA registers did not change");
+}
+
+/* tiledata inheritance test */
+
+static void test_fork(void)
+{
+ pid_t child, grandchild;
+
+ child = fork();
+ if (child < 0) {
+ /* fork() failed */
+ fatal_error("fork");
+ } else if (child > 0) {
+ /* fork() succeeded. Now in the parent. */
+ int status;
+
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ fatal_error("fork test child");
+ return;
+ }
+ /* fork() succeeded. Now in the child. */
+ printf("[RUN]\tCheck tile data inheritance.\n\tBefore fork(), load tiledata\n");
+
+ load_rand_tiledata(stashed_xsave);
+
+ grandchild = fork();
+ if (grandchild < 0) {
+ /* fork() failed */
+ fatal_error("fork");
+ } else if (grandchild > 0) {
+ /* fork() succeeded. Still in the first child. */
+ int status;
+
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ fatal_error("fork test grand child");
+ _exit(0);
+ }
+ /* fork() succeeded. Now in the (grand)child. */
+
+ /*
+ * TILEDATA registers are not preserved across fork().
+ * Ensure that their value has changed:
+ */
+ validate_tiledata_regs_changed(stashed_xsave);
+
+ _exit(0);
+}
+
+/* Context switching test */
+
+static struct _ctxtswtest_cfg {
+ unsigned int iterations;
+ unsigned int num_threads;
+} ctxtswtest_config;
+
+struct futex_info {
+ pthread_t thread;
+ int nr;
+ pthread_mutex_t mutex;
+ struct futex_info *next;
+};
+
+static void *check_tiledata(void *info)
+{
+ struct futex_info *finfo = (struct futex_info *)info;
+ struct xsave_buffer *xbuf;
+ int i;
+
+ xbuf = alloc_xbuf();
+ if (!xbuf)
+ fatal_error("unable to allocate XSAVE buffer");
+
+ /*
+ * Load random data into 'xbuf' and then restore
+ * it to the tile registers themselves.
+ */
+ load_rand_tiledata(xbuf);
+ for (i = 0; i < ctxtswtest_config.iterations; i++) {
+ pthread_mutex_lock(&finfo->mutex);
+
+ /*
+ * Ensure the register values have not
+ * diverged from those recorded in 'xbuf'.
+ */
+ validate_tiledata_regs_same(xbuf);
+
+ /* Load new, random values into xbuf and registers */
+ load_rand_tiledata(xbuf);
+
+ /*
+ * The last thread's last unlock will be for
+ * thread 0's mutex. However, thread 0 will
+ * have already exited the loop and the mutex
+ * will already be unlocked.
+ *
+ * Because this is not an ERRORCHECK mutex,
+ * that inconsistency will be silently ignored.
+ */
+ pthread_mutex_unlock(&finfo->next->mutex);
+ }
+
+ free(xbuf);
+ /*
+ * Return this thread's finfo, which is
+ * a unique value for this thread.
+ */
+ return finfo;
+}
+
+static int create_threads(int num, struct futex_info *finfo)
+{
+ int i;
+
+ for (i = 0; i < num; i++) {
+ int next_nr;
+
+ finfo[i].nr = i;
+ /*
+ * Thread 'i' will wait on this mutex to
+ * be unlocked. Lock it immediately after
+ * initialization:
+ */
+ pthread_mutex_init(&finfo[i].mutex, NULL);
+ pthread_mutex_lock(&finfo[i].mutex);
+
+ next_nr = (i + 1) % num;
+ finfo[i].next = &finfo[next_nr];
+
+ if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i]))
+ fatal_error("pthread_create()");
+ }
+ return 0;
+}
+
+static void affinitize_cpu0(void)
+{
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ fatal_error("sched_setaffinity to CPU 0");
+}
+
+static void test_context_switch(void)
+{
+ struct futex_info *finfo;
+ int i;
+
+ /* Affinitize to one CPU to force context switches */
+ affinitize_cpu0();
+
+ req_xtiledata_perm();
+
+ printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n",
+ ctxtswtest_config.iterations,
+ ctxtswtest_config.num_threads);
+
+
+ finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads);
+ if (!finfo)
+ fatal_error("malloc()");
+
+ create_threads(ctxtswtest_config.num_threads, finfo);
+
+ /*
+ * This thread wakes up thread 0
+ * Thread 0 will wake up 1
+ * Thread 1 will wake up 2
+ * ...
+ * the last thread will wake up 0
+ *
+ * ... this will repeat for the configured
+ * number of iterations.
+ */
+ pthread_mutex_unlock(&finfo[0].mutex);
+
+ /* Wait for all the threads to finish: */
+ for (i = 0; i < ctxtswtest_config.num_threads; i++) {
+ void *thread_retval;
+ int rc;
+
+ rc = pthread_join(finfo[i].thread, &thread_retval);
+
+ if (rc)
+ fatal_error("pthread_join() failed for thread %d err: %d\n",
+ i, rc);
+
+ if (thread_retval != &finfo[i])
+ fatal_error("unexpected thread retval for thread %d: %p\n",
+ i, thread_retval);
+
+ }
+
+ printf("[OK]\tNo incorrect case was found.\n");
+
+ free(finfo);
+}
+
+int main(void)
+{
+ /* Check hardware availability at first */
+ check_cpuid_xsave();
+ check_cpuid_xtiledata();
+
+ init_stashed_xsave();
+ sethandler(SIGILL, handle_noperm, 0);
+
+ test_dynamic_state();
+
+ /* Request permission for the following tests */
+ req_xtiledata_perm();
+
+ test_fork();
+
+ ctxtswtest_config.iterations = 10;
+ ctxtswtest_config.num_threads = 5;
+ test_context_switch();
+
+ clearhandler(SIGILL);
+ free_stashed_xsave();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
index 3e2089c8cf54..8c669c0d662e 100755
--- a/tools/testing/selftests/x86/check_cc.sh
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -7,7 +7,7 @@ CC="$1"
TESTPROG="$2"
shift 2
-if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
echo 1
else
echo 0
diff --git a/tools/testing/selftests/x86/corrupt_xstate_header.c b/tools/testing/selftests/x86/corrupt_xstate_header.c
new file mode 100644
index 000000000000..cf9ce8fbb656
--- /dev/null
+++ b/tools/testing/selftests/x86/corrupt_xstate_header.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Corrupt the XSTATE header in a signal frame
+ *
+ * Based on analysis and a test case from Thomas Gleixner.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <err.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h" /* For __cpuid_count() */
+
+static inline int xsave_enabled(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ __cpuid_count(0x1, 0x0, eax, ebx, ecx, edx);
+
+ /* Is CR4.OSXSAVE enabled ? */
+ return ecx & (1U << 27);
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void sigusr1(int sig, siginfo_t *info, void *uc_void)
+{
+ ucontext_t *uc = uc_void;
+ uint8_t *fpstate = (uint8_t *)uc->uc_mcontext.fpregs;
+ uint64_t *xfeatures = (uint64_t *)(fpstate + 512);
+
+ printf("\tWreck XSTATE header\n");
+ /* Wreck the first reserved bytes in the header */
+ *(xfeatures + 2) = 0xfffffff;
+}
+
+static void sigsegv(int sig, siginfo_t *info, void *uc_void)
+{
+ printf("\tGot SIGSEGV\n");
+}
+
+int main(void)
+{
+ cpu_set_t set;
+
+ sethandler(SIGUSR1, sigusr1, 0);
+ sethandler(SIGSEGV, sigsegv, 0);
+
+ if (!xsave_enabled()) {
+ printf("[SKIP] CR4.OSXSAVE disabled.\n");
+ return 0;
+ }
+
+ CPU_ZERO(&set);
+ CPU_SET(0, &set);
+
+ /*
+ * Enforce that the child runs on the same CPU
+ * which in turn forces a schedule.
+ */
+ sched_setaffinity(getpid(), sizeof(set), &set);
+
+ printf("[RUN]\tSend ourselves a signal\n");
+ raise(SIGUSR1);
+
+ printf("[OK]\tBack from the signal. Now schedule.\n");
+ pid_t child = fork();
+ if (child < 0)
+ err(1, "fork");
+ if (child == 0)
+ return 0;
+ if (child)
+ waitpid(child, NULL, 0);
+ printf("[OK]\tBack in the main thread.\n");
+
+ /*
+ * We could try to confirm that extended state is still preserved
+ * when we schedule. For now, the only indication of failure is
+ * a warning in the kernel logs.
+ */
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 15a329da59fa..8c780cce941d 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -285,7 +285,8 @@ static unsigned short load_gs(void)
/* 32-bit set_thread_area */
long ret;
asm volatile ("int $0x80"
- : "=a" (ret) : "a" (243), "b" (low_desc)
+ : "=a" (ret), "+m" (*low_desc)
+ : "a" (243), "b" (low_desc)
: "r8", "r9", "r10", "r11");
memcpy(&desc, low_desc, sizeof(desc));
munmap(low_desc, sizeof(desc));
@@ -391,8 +392,8 @@ static void set_gs_and_switch_to(unsigned long local,
local = read_base(GS);
/*
- * Signal delivery seems to mess up weird selectors. Put it
- * back.
+ * Signal delivery is quite likely to change a selector
+ * of 1, 2, or 3 back to 0 due to IRET being defective.
*/
asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
} else {
@@ -410,6 +411,14 @@ static void set_gs_and_switch_to(unsigned long local,
if (base == local && sel_pre_sched == sel_post_sched) {
printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
sel_pre_sched, local);
+ } else if (base == local && sel_pre_sched >= 1 && sel_pre_sched <= 3 &&
+ sel_post_sched == 0) {
+ /*
+ * IRET is misdesigned and will squash selectors 1, 2, or 3
+ * to zero. Don't fail the test just because this happened.
+ */
+ printf("[OK]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx because IRET is defective\n",
+ sel_pre_sched, local, sel_post_sched, base);
} else {
nerrs++;
printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",
@@ -442,6 +451,68 @@ static void test_unexpected_base(void)
#define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
+static void test_ptrace_write_gs_read_base(void)
+{
+ int status;
+ pid_t child = fork();
+
+ if (child < 0)
+ err(1, "fork");
+
+ if (child == 0) {
+ printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
+
+ printf("[RUN]\tARCH_SET_GS to 1\n");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ raise(SIGTRAP);
+ _exit(0);
+ }
+
+ wait(&status);
+
+ if (WSTOPSIG(status) == SIGTRAP) {
+ unsigned long base;
+ unsigned long gs_offset = USER_REGS_OFFSET(gs);
+ unsigned long base_offset = USER_REGS_OFFSET(gs_base);
+
+ /* Read the initial base. It should be 1. */
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+ if (base == 1) {
+ printf("[OK]\tGSBASE started at 1\n");
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
+ }
+
+ printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
+
+ /* Poke an LDT selector into GS. */
+ if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
+ err(1, "PTRACE_POKEUSER");
+
+ /* And read the base. */
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+
+ if (base == 0 || base == 1) {
+ printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
+ }
+ }
+
+ ptrace(PTRACE_CONT, child, NULL, NULL);
+
+ wait(&status);
+ if (!WIFEXITED(status))
+ printf("[WARN]\tChild didn't exit cleanly.\n");
+}
+
static void test_ptrace_write_gsbase(void)
{
int status;
@@ -489,16 +560,36 @@ static void test_ptrace_write_gsbase(void)
* selector value is changed or not by the GSBASE write in
* a ptracer.
*/
- if (gs == 0 && base == 0xFF) {
- printf("[OK]\tGS was reset as expected\n");
- } else {
+ if (gs != *shared_scratch) {
nerrs++;
- printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 0xFF)\n", gs, base);
+ printf("[FAIL]\tGS changed to %lx\n", gs);
+
+ /*
+ * On older kernels, poking a nonzero value into the
+ * base would zero the selector. On newer kernels,
+ * this behavior has changed -- poking the base
+ * changes only the base and, if FSGSBASE is not
+ * available, this may have no effect once the tracee
+ * is resumed.
+ */
+ if (gs == 0)
+ printf("\tNote: this is expected behavior on older kernels.\n");
+ } else if (have_fsgsbase && (base != 0xFF)) {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to %lx\n", base);
+ } else {
+ printf("[OK]\tGS remained 0x%hx", *shared_scratch);
+ if (have_fsgsbase)
+ printf(" and GSBASE changed to 0xFF");
+ printf("\n");
}
}
END:
ptrace(PTRACE_CONT, child, NULL, NULL);
+ wait(&status);
+ if (!WIFEXITED(status))
+ printf("[WARN]\tChild didn't exit cleanly.\n");
}
int main()
@@ -508,6 +599,9 @@ int main()
shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ /* Do these tests before we have an LDT. */
+ test_ptrace_write_gs_read_base();
+
/* Probe FSGSBASE */
sethandler(SIGILL, sigill, 0);
if (sigsetjmp(jmpbuf, 1) == 0) {
diff --git a/tools/testing/selftests/x86/fsgsbase_restore.c b/tools/testing/selftests/x86/fsgsbase_restore.c
new file mode 100644
index 000000000000..6fffadc51579
--- /dev/null
+++ b/tools/testing/selftests/x86/fsgsbase_restore.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * fsgsbase_restore.c, test ptrace vs fsgsbase
+ * Copyright (c) 2020 Andy Lutomirski
+ *
+ * This test case simulates a tracer redirecting tracee execution to
+ * a function and then restoring tracee state using PTRACE_GETREGS and
+ * PTRACE_SETREGS. This is similar to what gdb does when doing
+ * 'p func()'. The catch is that this test has the called function
+ * modify a segment register. This makes sure that ptrace correctly
+ * restores segment state when using PTRACE_SETREGS.
+ *
+ * This is not part of fsgsbase.c, because that test is 64-bit only.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <err.h>
+#include <sys/user.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <asm/ldt.h>
+#include <sys/mman.h>
+#include <stddef.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include <stdint.h>
+
+#define EXPECTED_VALUE 0x1337f00d
+
+#ifdef __x86_64__
+# define SEG "%gs"
+#else
+# define SEG "%fs"
+#endif
+
+static unsigned int dereference_seg_base(void)
+{
+ int ret;
+ asm volatile ("mov %" SEG ":(0), %0" : "=rm" (ret));
+ return ret;
+}
+
+static void init_seg(void)
+{
+ unsigned int *target = mmap(
+ NULL, sizeof(unsigned int),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ if (target == MAP_FAILED)
+ err(1, "mmap");
+
+ *target = EXPECTED_VALUE;
+
+ printf("\tsegment base address = 0x%lx\n", (unsigned long)target);
+
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = (unsigned int)(uintptr_t)target,
+ .limit = sizeof(unsigned int) - 1,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
+ printf("\tusing LDT slot 0\n");
+ asm volatile ("mov %0, %" SEG :: "rm" ((unsigned short)0x7));
+ } else {
+ /* No modify_ldt for us (configured out, perhaps) */
+
+ struct user_desc *low_desc = mmap(
+ NULL, sizeof(desc),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ memcpy(low_desc, &desc, sizeof(desc));
+
+ low_desc->entry_number = -1;
+
+ /* 32-bit set_thread_area */
+ long ret;
+ asm volatile ("int $0x80"
+ : "=a" (ret), "+m" (*low_desc)
+ : "a" (243), "b" (low_desc)
+#ifdef __x86_64__
+ : "r8", "r9", "r10", "r11"
+#endif
+ );
+ memcpy(&desc, low_desc, sizeof(desc));
+ munmap(low_desc, sizeof(desc));
+
+ if (ret != 0) {
+ printf("[NOTE]\tcould not create a segment -- can't test anything\n");
+ exit(0);
+ }
+ printf("\tusing GDT slot %d\n", desc.entry_number);
+
+ unsigned short sel = (unsigned short)((desc.entry_number << 3) | 0x3);
+ asm volatile ("mov %0, %" SEG :: "rm" (sel));
+ }
+}
+
+static void tracee_zap_segment(void)
+{
+ /*
+ * The tracer will redirect execution here. This is meant to
+ * work like gdb's 'p func()' feature. The tricky bit is that
+ * we modify a segment register in order to make sure that ptrace
+ * can correctly restore segment registers.
+ */
+ printf("\tTracee: in tracee_zap_segment()\n");
+
+ /*
+ * Write a nonzero selector with base zero to the segment register.
+ * Using a null selector would defeat the test on AMD pre-Zen2
+ * CPUs, as such CPUs don't clear the base when loading a null
+ * selector.
+ */
+ unsigned short sel;
+ asm volatile ("mov %%ss, %0\n\t"
+ "mov %0, %" SEG
+ : "=rm" (sel));
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tTracee is going back to sleep\n");
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ /* Should not get here. */
+ while (true) {
+ printf("[FAIL]\tTracee hit unreachable code\n");
+ pause();
+ }
+}
+
+int main()
+{
+ printf("\tSetting up a segment\n");
+ init_seg();
+
+ unsigned int val = dereference_seg_base();
+ if (val != EXPECTED_VALUE) {
+ printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE);
+ return 1;
+ }
+ printf("[OK]\tThe segment points to the right place.\n");
+
+ pid_t chld = fork();
+ if (chld < 0)
+ err(1, "fork");
+
+ if (chld == 0) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0, 0);
+
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tTracee will take a nap until signaled\n");
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ printf("\tTracee was resumed. Will re-check segment.\n");
+
+ val = dereference_seg_base();
+ if (val != EXPECTED_VALUE) {
+ printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE);
+ exit(1);
+ }
+
+ printf("[OK]\tThe segment points to the right place.\n");
+ exit(0);
+ }
+
+ int status;
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ struct user_regs_struct regs;
+
+ if (ptrace(PTRACE_GETREGS, chld, NULL, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+#ifdef __x86_64__
+ printf("\tChild GS=0x%lx, GSBASE=0x%lx\n", (unsigned long)regs.gs, (unsigned long)regs.gs_base);
+#else
+ printf("\tChild FS=0x%lx\n", (unsigned long)regs.xfs);
+#endif
+
+ struct user_regs_struct regs2 = regs;
+#ifdef __x86_64__
+ regs2.rip = (unsigned long)tracee_zap_segment;
+ regs2.rsp -= 128; /* Don't clobber the redzone. */
+#else
+ regs2.eip = (unsigned long)tracee_zap_segment;
+#endif
+
+ printf("\tTracer: redirecting tracee to tracee_zap_segment()\n");
+ if (ptrace(PTRACE_SETREGS, chld, NULL, &regs2) != 0)
+ err(1, "PTRACE_GETREGS");
+ if (ptrace(PTRACE_CONT, chld, NULL, NULL) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ printf("\tTracer: restoring tracee state\n");
+ if (ptrace(PTRACE_SETREGS, chld, NULL, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+ if (ptrace(PTRACE_DETACH, chld, NULL, NULL) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+
+ if (WIFSIGNALED(status)) {
+ printf("[FAIL]\tTracee crashed\n");
+ return 1;
+ }
+
+ if (!WIFEXITED(status)) {
+ printf("[FAIL]\tTracee stopped for an unexpected reason: %d\n", status);
+ return 1;
+ }
+
+ int exitcode = WEXITSTATUS(status);
+ if (exitcode != 0) {
+ printf("[FAIL]\tTracee reported failure\n");
+ return 1;
+ }
+
+ printf("[OK]\tAll is well.\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/helpers.h b/tools/testing/selftests/x86/helpers.h
new file mode 100644
index 000000000000..4ef42c4559a9
--- /dev/null
+++ b/tools/testing/selftests/x86/helpers.h
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef __SELFTESTS_X86_HELPERS_H
+#define __SELFTESTS_X86_HELPERS_H
+
+#include <asm/processor-flags.h>
+
+static inline unsigned long get_eflags(void)
+{
+#ifdef __x86_64__
+ return __builtin_ia32_readeflags_u64();
+#else
+ return __builtin_ia32_readeflags_u32();
+#endif
+}
+
+static inline void set_eflags(unsigned long eflags)
+{
+#ifdef __x86_64__
+ __builtin_ia32_writeeflags_u64(eflags);
+#else
+ __builtin_ia32_writeeflags_u32(eflags);
+#endif
+}
+
+#endif /* __SELFTESTS_X86_HELPERS_H */
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
index bab2f6e06b63..7e3e09c1abac 100644
--- a/tools/testing/selftests/x86/iopl.c
+++ b/tools/testing/selftests/x86/iopl.c
@@ -85,48 +85,88 @@ static void expect_gp_outb(unsigned short port)
printf("[OK]\toutb to 0x%02hx failed\n", port);
}
-static bool try_cli(void)
+#define RET_FAULTED 0
+#define RET_FAIL 1
+#define RET_EMUL 2
+
+static int try_cli(void)
{
+ unsigned long flags;
+
sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
if (sigsetjmp(jmpbuf, 1) != 0) {
- return false;
+ return RET_FAULTED;
} else {
- asm volatile ("cli");
- return true;
+ asm volatile("cli; pushf; pop %[flags]"
+ : [flags] "=rm" (flags));
+
+ /* X86_FLAGS_IF */
+ if (!(flags & (1 << 9)))
+ return RET_FAIL;
+ else
+ return RET_EMUL;
}
clearhandler(SIGSEGV);
}
-static bool try_sti(void)
+static int try_sti(bool irqs_off)
{
+ unsigned long flags;
+
sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
if (sigsetjmp(jmpbuf, 1) != 0) {
- return false;
+ return RET_FAULTED;
} else {
- asm volatile ("sti");
- return true;
+ asm volatile("sti; pushf; pop %[flags]"
+ : [flags] "=rm" (flags));
+
+ /* X86_FLAGS_IF */
+ if (irqs_off && (flags & (1 << 9)))
+ return RET_FAIL;
+ else
+ return RET_EMUL;
}
clearhandler(SIGSEGV);
}
-static void expect_gp_sti(void)
+static void expect_gp_sti(bool irqs_off)
{
- if (try_sti()) {
+ int ret = try_sti(irqs_off);
+
+ switch (ret) {
+ case RET_FAULTED:
+ printf("[OK]\tSTI faulted\n");
+ break;
+ case RET_EMUL:
+ printf("[OK]\tSTI NOPped\n");
+ break;
+ default:
printf("[FAIL]\tSTI worked\n");
nerrs++;
- } else {
- printf("[OK]\tSTI faulted\n");
}
}
-static void expect_gp_cli(void)
+/*
+ * Returns whether it managed to disable interrupts.
+ */
+static bool test_cli(void)
{
- if (try_cli()) {
+ int ret = try_cli();
+
+ switch (ret) {
+ case RET_FAULTED:
+ printf("[OK]\tCLI faulted\n");
+ break;
+ case RET_EMUL:
+ printf("[OK]\tCLI NOPped\n");
+ break;
+ default:
printf("[FAIL]\tCLI worked\n");
nerrs++;
- } else {
- printf("[OK]\tCLI faulted\n");
+ return true;
}
+
+ return false;
}
int main(void)
@@ -152,8 +192,7 @@ int main(void)
}
/* Make sure that CLI/STI are blocked even with IOPL level 3 */
- expect_gp_cli();
- expect_gp_sti();
+ expect_gp_sti(test_cli());
expect_ok_outb(0x80);
/* Establish an I/O bitmap to test the restore */
@@ -204,8 +243,7 @@ int main(void)
printf("[RUN]\tparent: write to 0x80 (should fail)\n");
expect_gp_outb(0x80);
- expect_gp_cli();
- expect_gp_sti();
+ expect_gp_sti(test_cli());
/* Test the capability checks. */
printf("\tiopl(3)\n");
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 1aef72df20a1..3a29346e1452 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -607,7 +607,7 @@ static void do_multicpu_tests(void)
failures++;
asm volatile ("mov %0, %%ss" : : "rm" (orig_ss));
- };
+ }
ftx = 100; /* Kill the thread. */
syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
index 6da0ac3f0135..cc3de6ff9fba 100644
--- a/tools/testing/selftests/x86/mov_ss_trap.c
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -47,7 +47,6 @@
unsigned short ss;
extern unsigned char breakpoint_insn[];
sigjmp_buf jmpbuf;
-static unsigned char altstack_data[SIGSTKSZ];
static void enable_watchpoint(void)
{
@@ -250,13 +249,14 @@ int main()
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; SYSENTER\n");
stack_t stack = {
- .ss_sp = altstack_data,
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
err(1, "sigaltstack");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
nr = SYS_getpid;
+ free(stack.ss_sp);
/* Clear EBP first to make sure we segfault cleanly. */
asm volatile ("xorl %%ebp, %%ebp; mov %[ss], %%ss; SYSENTER" : "+a" (nr)
: [ss] "m" (ss) : "flags", "rcx"
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
deleted file mode 100644
index 254e5436bdd9..000000000000
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PKEYS_HELPER_H
-#define _PKEYS_HELPER_H
-#define _GNU_SOURCE
-#include <string.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-
-#define NR_PKEYS 16
-#define PKRU_BITS_PER_PKEY 2
-
-#ifndef DEBUG_LEVEL
-#define DEBUG_LEVEL 0
-#endif
-#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
-extern int dprint_in_signal;
-extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-static inline void sigsafe_printf(const char *format, ...)
-{
- va_list ap;
-
- if (!dprint_in_signal) {
- va_start(ap, format);
- vprintf(format, ap);
- va_end(ap);
- } else {
- int ret;
- /*
- * No printf() functions are signal-safe.
- * They deadlock easily. Write the format
- * string to get some output, even if
- * incomplete.
- */
- ret = write(1, format, strlen(format));
- if (ret < 0)
- exit(1);
- }
-}
-#define dprintf_level(level, args...) do { \
- if (level <= DEBUG_LEVEL) \
- sigsafe_printf(args); \
-} while (0)
-#define dprintf0(args...) dprintf_level(0, args)
-#define dprintf1(args...) dprintf_level(1, args)
-#define dprintf2(args...) dprintf_level(2, args)
-#define dprintf3(args...) dprintf_level(3, args)
-#define dprintf4(args...) dprintf_level(4, args)
-
-extern unsigned int shadow_pkru;
-static inline unsigned int __rdpkru(void)
-{
- unsigned int eax, edx;
- unsigned int ecx = 0;
- unsigned int pkru;
-
- asm volatile(".byte 0x0f,0x01,0xee\n\t"
- : "=a" (eax), "=d" (edx)
- : "c" (ecx));
- pkru = eax;
- return pkru;
-}
-
-static inline unsigned int _rdpkru(int line)
-{
- unsigned int pkru = __rdpkru();
-
- dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
- line, pkru, shadow_pkru);
- assert(pkru == shadow_pkru);
-
- return pkru;
-}
-
-#define rdpkru() _rdpkru(__LINE__)
-
-static inline void __wrpkru(unsigned int pkru)
-{
- unsigned int eax = pkru;
- unsigned int ecx = 0;
- unsigned int edx = 0;
-
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
- asm volatile(".byte 0x0f,0x01,0xef\n\t"
- : : "a" (eax), "c" (ecx), "d" (edx));
- assert(pkru == __rdpkru());
-}
-
-static inline void wrpkru(unsigned int pkru)
-{
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
- /* will do the shadow check for us: */
- rdpkru();
- __wrpkru(pkru);
- shadow_pkru = pkru;
- dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
-}
-
-/*
- * These are technically racy. since something could
- * change PKRU between the read and the write.
- */
-static inline void __pkey_access_allow(int pkey, int do_allow)
-{
- unsigned int pkru = rdpkru();
- int bit = pkey * 2;
-
- if (do_allow)
- pkru &= (1<<bit);
- else
- pkru |= (1<<bit);
-
- dprintf4("pkru now: %08x\n", rdpkru());
- wrpkru(pkru);
-}
-
-static inline void __pkey_write_allow(int pkey, int do_allow_write)
-{
- long pkru = rdpkru();
- int bit = pkey * 2 + 1;
-
- if (do_allow_write)
- pkru &= (1<<bit);
- else
- pkru |= (1<<bit);
-
- wrpkru(pkru);
- dprintf4("pkru now: %08x\n", rdpkru());
-}
-
-#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
-#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
-#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
-#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
-
-#define PAGE_SIZE 4096
-#define MB (1<<20)
-
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
-#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
-
-static inline int cpu_has_pku(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
-
- eax = 0x7;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (!(ecx & X86_FEATURE_PKU)) {
- dprintf2("cpu does not have PKU\n");
- return 0;
- }
- if (!(ecx & X86_FEATURE_OSPKE)) {
- dprintf2("cpu does not have OSPKE\n");
- return 0;
- }
- return 1;
-}
-
-#define XSTATE_PKRU_BIT (9)
-#define XSTATE_PKRU 0x200
-
-int pkru_xstate_offset(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- int xstate_offset;
- int xstate_size;
- unsigned long XSTATE_CPUID = 0xd;
- int leaf;
-
- /* assume that XSTATE_PKRU is set in XCR0 */
- leaf = XSTATE_PKRU_BIT;
- {
- eax = XSTATE_CPUID;
- ecx = leaf;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (leaf == XSTATE_PKRU_BIT) {
- xstate_offset = ebx;
- xstate_size = eax;
- }
- }
-
- if (xstate_size == 0) {
- printf("could not find size/offset of PKRU in xsave state\n");
- return 0;
- }
-
- return xstate_offset;
-}
-
-#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
deleted file mode 100644
index 480995bceefa..000000000000
--- a/tools/testing/selftests/x86/protection_keys.c
+++ /dev/null
@@ -1,1506 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Tests x86 Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
- *
- * There are examples in here of:
- * * how to set protection keys on memory
- * * how to set/clear bits in PKRU (the rights register)
- * * how to handle SEGV_PKRU signals and extract pkey-relevant
- * information from the siginfo
- *
- * Things to add:
- * make sure KSM and KSM COW breaking works
- * prefault pages in at malloc, or not
- * protect MPX bounds tables with protection keys?
- * make sure VMA splitting/merging is working correctly
- * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
- * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
- * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
- *
- * Compile like this:
- * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- */
-#define _GNU_SOURCE
-#include <errno.h>
-#include <linux/futex.h>
-#include <sys/time.h>
-#include <sys/syscall.h>
-#include <string.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <sys/ptrace.h>
-#include <setjmp.h>
-
-#include "pkey-helpers.h"
-
-int iteration_nr = 1;
-int test_nr;
-
-unsigned int shadow_pkru;
-
-#define HPAGE_SIZE (1UL<<21)
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
-#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
-#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
-#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
-#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
-#define __stringify_1(x...) #x
-#define __stringify(x...) __stringify_1(x)
-
-#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
-
-int dprint_in_signal;
-char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-
-extern void abort_hooks(void);
-#define pkey_assert(condition) do { \
- if (!(condition)) { \
- dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
- __FILE__, __LINE__, \
- test_nr, iteration_nr); \
- dprintf0("errno at assert: %d", errno); \
- abort_hooks(); \
- exit(__LINE__); \
- } \
-} while (0)
-
-void cat_into_file(char *str, char *file)
-{
- int fd = open(file, O_RDWR);
- int ret;
-
- dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
- /*
- * these need to be raw because they are called under
- * pkey_assert()
- */
- if (fd < 0) {
- fprintf(stderr, "error opening '%s'\n", str);
- perror("error: ");
- exit(__LINE__);
- }
-
- ret = write(fd, str, strlen(str));
- if (ret != strlen(str)) {
- perror("write to file failed");
- fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
- exit(__LINE__);
- }
- close(fd);
-}
-
-#if CONTROL_TRACING > 0
-static int warned_tracing;
-int tracing_root_ok(void)
-{
- if (geteuid() != 0) {
- if (!warned_tracing)
- fprintf(stderr, "WARNING: not run as root, "
- "can not do tracing control\n");
- warned_tracing = 1;
- return 0;
- }
- return 1;
-}
-#endif
-
-void tracing_on(void)
-{
-#if CONTROL_TRACING > 0
-#define TRACEDIR "/sys/kernel/debug/tracing"
- char pidstr[32];
-
- if (!tracing_root_ok())
- return;
-
- sprintf(pidstr, "%d", getpid());
- cat_into_file("0", TRACEDIR "/tracing_on");
- cat_into_file("\n", TRACEDIR "/trace");
- if (1) {
- cat_into_file("function_graph", TRACEDIR "/current_tracer");
- cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
- } else {
- cat_into_file("nop", TRACEDIR "/current_tracer");
- }
- cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
- cat_into_file("1", TRACEDIR "/tracing_on");
- dprintf1("enabled tracing\n");
-#endif
-}
-
-void tracing_off(void)
-{
-#if CONTROL_TRACING > 0
- if (!tracing_root_ok())
- return;
- cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
-#endif
-}
-
-void abort_hooks(void)
-{
- fprintf(stderr, "running %s()...\n", __func__);
- tracing_off();
-#ifdef SLEEP_ON_ABORT
- sleep(SLEEP_ON_ABORT);
-#endif
-}
-
-static inline void __page_o_noops(void)
-{
- /* 8-bytes of instruction * 512 bytes = 1 page */
- asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
-}
-
-/*
- * This attempts to have roughly a page of instructions followed by a few
- * instructions that do a write, and another page of instructions. That
- * way, we are pretty sure that the write is in the second page of
- * instructions and has at least a page of padding behind it.
- *
- * *That* lets us be sure to madvise() away the write instruction, which
- * will then fault, which makes sure that the fault code handles
- * execute-only memory properly.
- */
-__attribute__((__aligned__(PAGE_SIZE)))
-void lots_o_noops_around_write(int *write_to_me)
-{
- dprintf3("running %s()\n", __func__);
- __page_o_noops();
- /* Assume this happens in the second page of instructions: */
- *write_to_me = __LINE__;
- /* pad out by another page: */
- __page_o_noops();
- dprintf3("%s() done\n", __func__);
-}
-
-/* Define some kernel-like types */
-#define u8 uint8_t
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
-
-#ifdef __i386__
-
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
-#endif
-
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
-
-#else
-
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
-#endif
-
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
-
-#endif
-
-void dump_mem(void *dumpme, int len_bytes)
-{
- char *c = (void *)dumpme;
- int i;
-
- for (i = 0; i < len_bytes; i += sizeof(u64)) {
- u64 *ptr = (u64 *)(c + i);
- dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
- }
-}
-
-/* Failed address bound checks: */
-#ifndef SEGV_BNDERR
-# define SEGV_BNDERR 3
-#endif
-
-#ifndef SEGV_PKUERR
-# define SEGV_PKUERR 4
-#endif
-
-static char *si_code_str(int si_code)
-{
- if (si_code == SEGV_MAPERR)
- return "SEGV_MAPERR";
- if (si_code == SEGV_ACCERR)
- return "SEGV_ACCERR";
- if (si_code == SEGV_BNDERR)
- return "SEGV_BNDERR";
- if (si_code == SEGV_PKUERR)
- return "SEGV_PKUERR";
- return "UNKNOWN";
-}
-
-int pkru_faults;
-int last_si_pkey = -1;
-void signal_handler(int signum, siginfo_t *si, void *vucontext)
-{
- ucontext_t *uctxt = vucontext;
- int trapno;
- unsigned long ip;
- char *fpregs;
- u32 *pkru_ptr;
- u64 siginfo_pkey;
- u32 *si_pkey_ptr;
- int pkru_offset;
- fpregset_t fpregset;
-
- dprint_in_signal = 1;
- dprintf1(">>>>===============SIGSEGV============================\n");
- dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
- __rdpkru(), shadow_pkru);
-
- trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
- ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
- fpregset = uctxt->uc_mcontext.fpregs;
- fpregs = (void *)fpregset;
-
- dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
- trapno, ip, si_code_str(si->si_code), si->si_code);
-#ifdef __i386__
- /*
- * 32-bit has some extra padding so that userspace can tell whether
- * the XSTATE header is present in addition to the "legacy" FPU
- * state. We just assume that it is here.
- */
- fpregs += 0x70;
-#endif
- pkru_offset = pkru_xstate_offset();
- pkru_ptr = (void *)(&fpregs[pkru_offset]);
-
- dprintf1("siginfo: %p\n", si);
- dprintf1(" fpregs: %p\n", fpregs);
- /*
- * If we got a PKRU fault, we *HAVE* to have at least one bit set in
- * here.
- */
- dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
- if (DEBUG_LEVEL > 4)
- dump_mem(pkru_ptr - 128, 256);
- pkey_assert(*pkru_ptr);
-
- if ((si->si_code == SEGV_MAPERR) ||
- (si->si_code == SEGV_ACCERR) ||
- (si->si_code == SEGV_BNDERR)) {
- printf("non-PK si_code, exiting...\n");
- exit(4);
- }
-
- si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
- dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
- dump_mem((u8 *)si_pkey_ptr - 8, 24);
- siginfo_pkey = *si_pkey_ptr;
- pkey_assert(siginfo_pkey < NR_PKEYS);
- last_si_pkey = siginfo_pkey;
-
- dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
- /* need __rdpkru() version so we do not do shadow_pkru checking */
- dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
- dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
- *(u64 *)pkru_ptr = 0x00000000;
- dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
- pkru_faults++;
- dprintf1("<<<<==================================================\n");
- dprint_in_signal = 0;
-}
-
-int wait_all_children(void)
-{
- int status;
- return waitpid(-1, &status, 0);
-}
-
-void sig_chld(int x)
-{
- dprint_in_signal = 1;
- dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
- dprint_in_signal = 0;
-}
-
-void setup_sigsegv_handler(void)
-{
- int r, rs;
- struct sigaction newact;
- struct sigaction oldact;
-
- /* #PF is mapped to sigsegv */
- int signum = SIGSEGV;
-
- newact.sa_handler = 0;
- newact.sa_sigaction = signal_handler;
-
- /*sigset_t - signals to block while in the handler */
- /* get the old signal mask. */
- rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
- pkey_assert(rs == 0);
-
- /* call sa_sigaction, not sa_handler*/
- newact.sa_flags = SA_SIGINFO;
-
- newact.sa_restorer = 0; /* void(*)(), obsolete */
- r = sigaction(signum, &newact, &oldact);
- r = sigaction(SIGALRM, &newact, &oldact);
- pkey_assert(r == 0);
-}
-
-void setup_handlers(void)
-{
- signal(SIGCHLD, &sig_chld);
- setup_sigsegv_handler();
-}
-
-pid_t fork_lazy_child(void)
-{
- pid_t forkret;
-
- forkret = fork();
- pkey_assert(forkret >= 0);
- dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
-
- if (!forkret) {
- /* in the child */
- while (1) {
- dprintf1("child sleeping...\n");
- sleep(30);
- }
- }
- return forkret;
-}
-
-#ifndef PKEY_DISABLE_ACCESS
-# define PKEY_DISABLE_ACCESS 0x1
-#endif
-
-#ifndef PKEY_DISABLE_WRITE
-# define PKEY_DISABLE_WRITE 0x2
-#endif
-
-static u32 hw_pkey_get(int pkey, unsigned long flags)
-{
- u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 pkru = __rdpkru();
- u32 shifted_pkru;
- u32 masked_pkru;
-
- dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
- __func__, pkey, flags, 0, 0);
- dprintf2("%s() raw pkru: %x\n", __func__, pkru);
-
- shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
- dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
- masked_pkru = shifted_pkru & mask;
- dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
- /*
- * shift down the relevant bits to the lowest two, then
- * mask off all the other high bits.
- */
- return masked_pkru;
-}
-
-static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
-{
- u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 old_pkru = __rdpkru();
- u32 new_pkru;
-
- /* make sure that 'rights' only contains the bits we expect: */
- assert(!(rights & ~mask));
-
- /* copy old pkru */
- new_pkru = old_pkru;
- /* mask out bits from pkey in old value: */
- new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
- /* OR in new bits for pkey: */
- new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
-
- __wrpkru(new_pkru);
-
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
- __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
- return 0;
-}
-
-void pkey_disable_set(int pkey, int flags)
-{
- unsigned long syscall_flags = 0;
- int ret;
- int pkey_rights;
- u32 orig_pkru = rdpkru();
-
- dprintf1("START->%s(%d, 0x%x)\n", __func__,
- pkey, flags);
- pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
-
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
- pkey_assert(pkey_rights >= 0);
-
- pkey_rights |= flags;
-
- ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
- assert(!ret);
- /*pkru and flags have the same format */
- shadow_pkru |= flags << (pkey * 2);
- dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
-
- pkey_assert(ret >= 0);
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
-
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
- if (flags)
- pkey_assert(rdpkru() > orig_pkru);
- dprintf1("END<---%s(%d, 0x%x)\n", __func__,
- pkey, flags);
-}
-
-void pkey_disable_clear(int pkey, int flags)
-{
- unsigned long syscall_flags = 0;
- int ret;
- int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u32 orig_pkru = rdpkru();
-
- pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
- pkey_assert(pkey_rights >= 0);
-
- pkey_rights |= flags;
-
- ret = hw_pkey_set(pkey, pkey_rights, 0);
- /* pkru and flags have the same format */
- shadow_pkru &= ~(flags << (pkey * 2));
- pkey_assert(ret >= 0);
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
-
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
- if (flags)
- assert(rdpkru() > orig_pkru);
-}
-
-void pkey_write_allow(int pkey)
-{
- pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_write_deny(int pkey)
-{
- pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_access_allow(int pkey)
-{
- pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
-}
-void pkey_access_deny(int pkey)
-{
- pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
-}
-
-int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
- unsigned long pkey)
-{
- int sret;
-
- dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
- ptr, size, orig_prot, pkey);
-
- errno = 0;
- sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
- if (errno) {
- dprintf2("SYS_mprotect_key sret: %d\n", sret);
- dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
- dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
- if (DEBUG_LEVEL >= 2)
- perror("SYS_mprotect_pkey");
- }
- return sret;
-}
-
-int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
-{
- int ret = syscall(SYS_pkey_alloc, flags, init_val);
- dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
- __func__, flags, init_val, ret, errno);
- return ret;
-}
-
-int alloc_pkey(void)
-{
- int ret;
- unsigned long init_val = 0x0;
-
- dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
- __LINE__, __rdpkru(), shadow_pkru);
- ret = sys_pkey_alloc(0, init_val);
- /*
- * pkey_alloc() sets PKRU, so we need to reflect it in
- * shadow_pkru:
- */
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- if (ret) {
- /* clear both the bits: */
- shadow_pkru &= ~(0x3 << (ret * 2));
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- /*
- * move the new state in from init_val
- * (remember, we cheated and init_val == pkru format)
- */
- shadow_pkru |= (init_val << (ret * 2));
- }
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
- /* for shadow checking: */
- rdpkru();
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- return ret;
-}
-
-int sys_pkey_free(unsigned long pkey)
-{
- int ret = syscall(SYS_pkey_free, pkey);
- dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
- return ret;
-}
-
-/*
- * I had a bug where pkey bits could be set by mprotect() but
- * not cleared. This ensures we get lots of random bit sets
- * and clears on the vma and pte pkey bits.
- */
-int alloc_random_pkey(void)
-{
- int max_nr_pkey_allocs;
- int ret;
- int i;
- int alloced_pkeys[NR_PKEYS];
- int nr_alloced = 0;
- int random_index;
- memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
-
- /* allocate every possible key and make a note of which ones we got */
- max_nr_pkey_allocs = NR_PKEYS;
- max_nr_pkey_allocs = 1;
- for (i = 0; i < max_nr_pkey_allocs; i++) {
- int new_pkey = alloc_pkey();
- if (new_pkey < 0)
- break;
- alloced_pkeys[nr_alloced++] = new_pkey;
- }
-
- pkey_assert(nr_alloced > 0);
- /* select a random one out of the allocated ones */
- random_index = rand() % nr_alloced;
- ret = alloced_pkeys[random_index];
- /* now zero it out so we don't free it next */
- alloced_pkeys[random_index] = 0;
-
- /* go through the allocated ones that we did not want and free them */
- for (i = 0; i < nr_alloced; i++) {
- int free_ret;
- if (!alloced_pkeys[i])
- continue;
- free_ret = sys_pkey_free(alloced_pkeys[i]);
- pkey_assert(!free_ret);
- }
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- return ret;
-}
-
-int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
- unsigned long pkey)
-{
- int nr_iterations = random() % 100;
- int ret;
-
- while (0) {
- int rpkey = alloc_random_pkey();
- ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
- dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
- ptr, size, orig_prot, pkey, ret);
- if (nr_iterations-- < 0)
- break;
-
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- sys_pkey_free(rpkey);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- }
- pkey_assert(pkey < NR_PKEYS);
-
- ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
- dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
- ptr, size, orig_prot, pkey, ret);
- pkey_assert(!ret);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
- return ret;
-}
-
-struct pkey_malloc_record {
- void *ptr;
- long size;
- int prot;
-};
-struct pkey_malloc_record *pkey_malloc_records;
-struct pkey_malloc_record *pkey_last_malloc_record;
-long nr_pkey_malloc_records;
-void record_pkey_malloc(void *ptr, long size, int prot)
-{
- long i;
- struct pkey_malloc_record *rec = NULL;
-
- for (i = 0; i < nr_pkey_malloc_records; i++) {
- rec = &pkey_malloc_records[i];
- /* find a free record */
- if (rec)
- break;
- }
- if (!rec) {
- /* every record is full */
- size_t old_nr_records = nr_pkey_malloc_records;
- size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
- size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
- dprintf2("new_nr_records: %zd\n", new_nr_records);
- dprintf2("new_size: %zd\n", new_size);
- pkey_malloc_records = realloc(pkey_malloc_records, new_size);
- pkey_assert(pkey_malloc_records != NULL);
- rec = &pkey_malloc_records[nr_pkey_malloc_records];
- /*
- * realloc() does not initialize memory, so zero it from
- * the first new record all the way to the end.
- */
- for (i = 0; i < new_nr_records - old_nr_records; i++)
- memset(rec + i, 0, sizeof(*rec));
- }
- dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
- (int)(rec - pkey_malloc_records), rec, ptr, size);
- rec->ptr = ptr;
- rec->size = size;
- rec->prot = prot;
- pkey_last_malloc_record = rec;
- nr_pkey_malloc_records++;
-}
-
-void free_pkey_malloc(void *ptr)
-{
- long i;
- int ret;
- dprintf3("%s(%p)\n", __func__, ptr);
- for (i = 0; i < nr_pkey_malloc_records; i++) {
- struct pkey_malloc_record *rec = &pkey_malloc_records[i];
- dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
- ptr, i, rec, rec->ptr, rec->size);
- if ((ptr < rec->ptr) ||
- (ptr >= rec->ptr + rec->size))
- continue;
-
- dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
- ptr, i, rec, rec->ptr, rec->size);
- nr_pkey_malloc_records--;
- ret = munmap(rec->ptr, rec->size);
- dprintf3("munmap ret: %d\n", ret);
- pkey_assert(!ret);
- dprintf3("clearing rec->ptr, rec: %p\n", rec);
- rec->ptr = NULL;
- dprintf3("done clearing rec->ptr, rec: %p\n", rec);
- return;
- }
- pkey_assert(false);
-}
-
-
-void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
-{
- void *ptr;
- int ret;
-
- rdpkru();
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- pkey_assert(pkey < NR_PKEYS);
- ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- pkey_assert(ptr != (void *)-1);
- ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
- pkey_assert(!ret);
- record_pkey_malloc(ptr, size, prot);
- rdpkru();
-
- dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
- return ptr;
-}
-
-void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
-{
- int ret;
- void *ptr;
-
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- /*
- * Guarantee we can fit at least one huge page in the resulting
- * allocation by allocating space for 2:
- */
- size = ALIGN_UP(size, HPAGE_SIZE * 2);
- ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- pkey_assert(ptr != (void *)-1);
- record_pkey_malloc(ptr, size, prot);
- mprotect_pkey(ptr, size, prot, pkey);
-
- dprintf1("unaligned ptr: %p\n", ptr);
- ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
- dprintf1(" aligned ptr: %p\n", ptr);
- ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
- dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
- ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
- dprintf1("MADV_WILLNEED ret: %d\n", ret);
- memset(ptr, 0, HPAGE_SIZE);
-
- dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
- return ptr;
-}
-
-int hugetlb_setup_ok;
-#define GET_NR_HUGE_PAGES 10
-void setup_hugetlbfs(void)
-{
- int err;
- int fd;
- char buf[] = "123";
-
- if (geteuid() != 0) {
- fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
- return;
- }
-
- cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
-
- /*
- * Now go make sure that we got the pages and that they
- * are 2M pages. Someone might have made 1G the default.
- */
- fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
- if (fd < 0) {
- perror("opening sysfs 2M hugetlb config");
- return;
- }
-
- /* -1 to guarantee leaving the trailing \0 */
- err = read(fd, buf, sizeof(buf)-1);
- close(fd);
- if (err <= 0) {
- perror("reading sysfs 2M hugetlb config");
- return;
- }
-
- if (atoi(buf) != GET_NR_HUGE_PAGES) {
- fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
- buf, GET_NR_HUGE_PAGES);
- return;
- }
-
- hugetlb_setup_ok = 1;
-}
-
-void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
-{
- void *ptr;
- int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
-
- if (!hugetlb_setup_ok)
- return PTR_ERR_ENOTSUP;
-
- dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
- size = ALIGN_UP(size, HPAGE_SIZE * 2);
- pkey_assert(pkey < NR_PKEYS);
- ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
- pkey_assert(ptr != (void *)-1);
- mprotect_pkey(ptr, size, prot, pkey);
-
- record_pkey_malloc(ptr, size, prot);
-
- dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
- return ptr;
-}
-
-void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
-{
- void *ptr;
- int fd;
-
- dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
- size, prot, pkey);
- pkey_assert(pkey < NR_PKEYS);
- fd = open("/dax/foo", O_RDWR);
- pkey_assert(fd >= 0);
-
- ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
- pkey_assert(ptr != (void *)-1);
-
- mprotect_pkey(ptr, size, prot, pkey);
-
- record_pkey_malloc(ptr, size, prot);
-
- dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
- close(fd);
- return ptr;
-}
-
-void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
-
- malloc_pkey_with_mprotect,
- malloc_pkey_anon_huge,
- malloc_pkey_hugetlb
-/* can not do direct with the pkey_mprotect() API:
- malloc_pkey_mmap_direct,
- malloc_pkey_mmap_dax,
-*/
-};
-
-void *malloc_pkey(long size, int prot, u16 pkey)
-{
- void *ret;
- static int malloc_type;
- int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
-
- pkey_assert(pkey < NR_PKEYS);
-
- while (1) {
- pkey_assert(malloc_type < nr_malloc_types);
-
- ret = pkey_malloc[malloc_type](size, prot, pkey);
- pkey_assert(ret != (void *)-1);
-
- malloc_type++;
- if (malloc_type >= nr_malloc_types)
- malloc_type = (random()%nr_malloc_types);
-
- /* try again if the malloc_type we tried is unsupported */
- if (ret == PTR_ERR_ENOTSUP)
- continue;
-
- break;
- }
-
- dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
- size, prot, pkey, ret);
- return ret;
-}
-
-int last_pkru_faults;
-#define UNKNOWN_PKEY -2
-void expected_pk_fault(int pkey)
-{
- dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
- __func__, last_pkru_faults, pkru_faults);
- dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
- pkey_assert(last_pkru_faults + 1 == pkru_faults);
-
- /*
- * For exec-only memory, we do not know the pkey in
- * advance, so skip this check.
- */
- if (pkey != UNKNOWN_PKEY)
- pkey_assert(last_si_pkey == pkey);
-
- /*
- * The signal handler shold have cleared out PKRU to let the
- * test program continue. We now have to restore it.
- */
- if (__rdpkru() != 0)
- pkey_assert(0);
-
- __wrpkru(shadow_pkru);
- dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
- __func__, shadow_pkru);
- last_pkru_faults = pkru_faults;
- last_si_pkey = -1;
-}
-
-#define do_not_expect_pk_fault(msg) do { \
- if (last_pkru_faults != pkru_faults) \
- dprintf0("unexpected PK fault: %s\n", msg); \
- pkey_assert(last_pkru_faults == pkru_faults); \
-} while (0)
-
-int test_fds[10] = { -1 };
-int nr_test_fds;
-void __save_test_fd(int fd)
-{
- pkey_assert(fd >= 0);
- pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
- test_fds[nr_test_fds] = fd;
- nr_test_fds++;
-}
-
-int get_test_read_fd(void)
-{
- int test_fd = open("/etc/passwd", O_RDONLY);
- __save_test_fd(test_fd);
- return test_fd;
-}
-
-void close_test_fds(void)
-{
- int i;
-
- for (i = 0; i < nr_test_fds; i++) {
- if (test_fds[i] < 0)
- continue;
- close(test_fds[i]);
- test_fds[i] = -1;
- }
- nr_test_fds = 0;
-}
-
-#define barrier() __asm__ __volatile__("": : :"memory")
-__attribute__((noinline)) int read_ptr(int *ptr)
-{
- /*
- * Keep GCC from optimizing this away somehow
- */
- barrier();
- return *ptr;
-}
-
-void test_read_of_write_disabled_region(int *ptr, u16 pkey)
-{
- int ptr_contents;
-
- dprintf1("disabling write access to PKEY[1], doing read\n");
- pkey_write_deny(pkey);
- ptr_contents = read_ptr(ptr);
- dprintf1("*ptr: %d\n", ptr_contents);
- dprintf1("\n");
-}
-void test_read_of_access_disabled_region(int *ptr, u16 pkey)
-{
- int ptr_contents;
-
- dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
- rdpkru();
- pkey_access_deny(pkey);
- ptr_contents = read_ptr(ptr);
- dprintf1("*ptr: %d\n", ptr_contents);
- expected_pk_fault(pkey);
-}
-void test_write_of_write_disabled_region(int *ptr, u16 pkey)
-{
- dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
- pkey_write_deny(pkey);
- *ptr = __LINE__;
- expected_pk_fault(pkey);
-}
-void test_write_of_access_disabled_region(int *ptr, u16 pkey)
-{
- dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
- pkey_access_deny(pkey);
- *ptr = __LINE__;
- expected_pk_fault(pkey);
-}
-void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
-{
- int ret;
- int test_fd = get_test_read_fd();
-
- dprintf1("disabling access to PKEY[%02d], "
- "having kernel read() to buffer\n", pkey);
- pkey_access_deny(pkey);
- ret = read(test_fd, ptr, 1);
- dprintf1("read ret: %d\n", ret);
- pkey_assert(ret);
-}
-void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
-{
- int ret;
- int test_fd = get_test_read_fd();
-
- pkey_write_deny(pkey);
- ret = read(test_fd, ptr, 100);
- dprintf1("read ret: %d\n", ret);
- if (ret < 0 && (DEBUG_LEVEL > 0))
- perror("verbose read result (OK for this to be bad)");
- pkey_assert(ret);
-}
-
-void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
-{
- int pipe_ret, vmsplice_ret;
- struct iovec iov;
- int pipe_fds[2];
-
- pipe_ret = pipe(pipe_fds);
-
- pkey_assert(pipe_ret == 0);
- dprintf1("disabling access to PKEY[%02d], "
- "having kernel vmsplice from buffer\n", pkey);
- pkey_access_deny(pkey);
- iov.iov_base = ptr;
- iov.iov_len = PAGE_SIZE;
- vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
- dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
- pkey_assert(vmsplice_ret == -1);
-
- close(pipe_fds[0]);
- close(pipe_fds[1]);
-}
-
-void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
-{
- int ignored = 0xdada;
- int futex_ret;
- int some_int = __LINE__;
-
- dprintf1("disabling write to PKEY[%02d], "
- "doing futex gunk in buffer\n", pkey);
- *ptr = some_int;
- pkey_write_deny(pkey);
- futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
- &ignored, ignored);
- if (DEBUG_LEVEL > 0)
- perror("futex");
- dprintf1("futex() ret: %d\n", futex_ret);
-}
-
-/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
-{
- int err;
- int i;
-
- /* Note: 0 is the default pkey, so don't mess with it */
- for (i = 1; i < NR_PKEYS; i++) {
- if (pkey == i)
- continue;
-
- dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
- err = sys_pkey_free(i);
- pkey_assert(err);
-
- err = sys_pkey_free(i);
- pkey_assert(err);
-
- err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
- pkey_assert(err);
- }
-}
-
-/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
-{
- int err;
- int bad_pkey = NR_PKEYS+99;
-
- /* pass a known-invalid pkey in: */
- err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
- pkey_assert(err);
-}
-
-void become_child(void)
-{
- pid_t forkret;
-
- forkret = fork();
- pkey_assert(forkret >= 0);
- dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
-
- if (!forkret) {
- /* in the child */
- return;
- }
- exit(0);
-}
-
-/* Assumes that all pkeys other than 'pkey' are unallocated */
-void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
-{
- int err;
- int allocated_pkeys[NR_PKEYS] = {0};
- int nr_allocated_pkeys = 0;
- int i;
-
- for (i = 0; i < NR_PKEYS*3; i++) {
- int new_pkey;
- dprintf1("%s() alloc loop: %d\n", __func__, i);
- new_pkey = alloc_pkey();
- dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, err, __rdpkru(), shadow_pkru);
- rdpkru(); /* for shadow checking */
- dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
- if ((new_pkey == -1) && (errno == ENOSPC)) {
- dprintf2("%s() failed to allocate pkey after %d tries\n",
- __func__, nr_allocated_pkeys);
- } else {
- /*
- * Ensure the number of successes never
- * exceeds the number of keys supported
- * in the hardware.
- */
- pkey_assert(nr_allocated_pkeys < NR_PKEYS);
- allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
- }
-
- /*
- * Make sure that allocation state is properly
- * preserved across fork().
- */
- if (i == NR_PKEYS*2)
- become_child();
- }
-
- dprintf3("%s()::%d\n", __func__, __LINE__);
-
- /*
- * There are 16 pkeys supported in hardware. Three are
- * allocated by the time we get here:
- * 1. The default key (0)
- * 2. One possibly consumed by an execute-only mapping.
- * 3. One allocated by the test code and passed in via
- * 'pkey' to this function.
- * Ensure that we can allocate at least another 13 (16-3).
- */
- pkey_assert(i >= NR_PKEYS-3);
-
- for (i = 0; i < nr_allocated_pkeys; i++) {
- err = sys_pkey_free(allocated_pkeys[i]);
- pkey_assert(!err);
- rdpkru(); /* for shadow checking */
- }
-}
-
-/*
- * pkey 0 is special. It is allocated by default, so you do not
- * have to call pkey_alloc() to use it first. Make sure that it
- * is usable.
- */
-void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
-{
- long size;
- int prot;
-
- assert(pkey_last_malloc_record);
- size = pkey_last_malloc_record->size;
- /*
- * This is a bit of a hack. But mprotect() requires
- * huge-page-aligned sizes when operating on hugetlbfs.
- * So, make sure that we use something that's a multiple
- * of a huge page when we can.
- */
- if (size >= HPAGE_SIZE)
- size = HPAGE_SIZE;
- prot = pkey_last_malloc_record->prot;
-
- /* Use pkey 0 */
- mprotect_pkey(ptr, size, prot, 0);
-
- /* Make sure that we can set it back to the original pkey. */
- mprotect_pkey(ptr, size, prot, pkey);
-}
-
-void test_ptrace_of_child(int *ptr, u16 pkey)
-{
- __attribute__((__unused__)) int peek_result;
- pid_t child_pid;
- void *ignored = 0;
- long ret;
- int status;
- /*
- * This is the "control" for our little expermient. Make sure
- * we can always access it when ptracing.
- */
- int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
- int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
-
- /*
- * Fork a child which is an exact copy of this process, of course.
- * That means we can do all of our tests via ptrace() and then plain
- * memory access and ensure they work differently.
- */
- child_pid = fork_lazy_child();
- dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
-
- ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
- if (ret)
- perror("attach");
- dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
- pkey_assert(ret != -1);
- ret = waitpid(child_pid, &status, WUNTRACED);
- if ((ret != child_pid) || !(WIFSTOPPED(status))) {
- fprintf(stderr, "weird waitpid result %ld stat %x\n",
- ret, status);
- pkey_assert(0);
- }
- dprintf2("waitpid ret: %ld\n", ret);
- dprintf2("waitpid status: %d\n", status);
-
- pkey_access_deny(pkey);
- pkey_write_deny(pkey);
-
- /* Write access, untested for now:
- ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
- pkey_assert(ret != -1);
- dprintf1("poke at %p: %ld\n", peek_at, ret);
- */
-
- /*
- * Try to access the pkey-protected "ptr" via ptrace:
- */
- ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
- /* expect it to work, without an error: */
- pkey_assert(ret != -1);
- /* Now access from the current task, and expect an exception: */
- peek_result = read_ptr(ptr);
- expected_pk_fault(pkey);
-
- /*
- * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
- */
- ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
- /* expect it to work, without an error: */
- pkey_assert(ret != -1);
- /* Now access from the current task, and expect NO exception: */
- peek_result = read_ptr(plain_ptr);
- do_not_expect_pk_fault("read plain pointer after ptrace");
-
- ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
- pkey_assert(ret != -1);
-
- ret = kill(child_pid, SIGKILL);
- pkey_assert(ret != -1);
-
- wait(&status);
-
- free(plain_ptr_unaligned);
-}
-
-void *get_pointer_to_instructions(void)
-{
- void *p1;
-
- p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
- dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
- /* lots_o_noops_around_write should be page-aligned already */
- assert(p1 == &lots_o_noops_around_write);
-
- /* Point 'p1' at the *second* page of the function: */
- p1 += PAGE_SIZE;
-
- /*
- * Try to ensure we fault this in on next touch to ensure
- * we get an instruction fault as opposed to a data one
- */
- madvise(p1, PAGE_SIZE, MADV_DONTNEED);
-
- return p1;
-}
-
-void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
-{
- void *p1;
- int scratch;
- int ptr_contents;
- int ret;
-
- p1 = get_pointer_to_instructions();
- lots_o_noops_around_write(&scratch);
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
-
- ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
- pkey_assert(!ret);
- pkey_access_deny(pkey);
-
- dprintf2("pkru: %x\n", rdpkru());
-
- /*
- * Make sure this is an *instruction* fault
- */
- madvise(p1, PAGE_SIZE, MADV_DONTNEED);
- lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(pkey);
-}
-
-void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
-{
- void *p1;
- int scratch;
- int ptr_contents;
- int ret;
-
- dprintf1("%s() start\n", __func__);
-
- p1 = get_pointer_to_instructions();
- lots_o_noops_around_write(&scratch);
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
-
- /* Use a *normal* mprotect(), not mprotect_pkey(): */
- ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
- pkey_assert(!ret);
-
- dprintf2("pkru: %x\n", rdpkru());
-
- /* Make sure this is an *instruction* fault */
- madvise(p1, PAGE_SIZE, MADV_DONTNEED);
- lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(UNKNOWN_PKEY);
-
- /*
- * Put the memory back to non-PROT_EXEC. Should clear the
- * exec-only pkey off the VMA and allow it to be readable
- * again. Go to PROT_NONE first to check for a kernel bug
- * that did not clear the pkey when doing PROT_NONE.
- */
- ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
- pkey_assert(!ret);
-
- ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
- pkey_assert(!ret);
- ptr_contents = read_ptr(p1);
- do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
-}
-
-void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
-{
- int size = PAGE_SIZE;
- int sret;
-
- if (cpu_has_pku()) {
- dprintf1("SKIP: %s: no CPU support\n", __func__);
- return;
- }
-
- sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
- pkey_assert(sret < 0);
-}
-
-void (*pkey_tests[])(int *ptr, u16 pkey) = {
- test_read_of_write_disabled_region,
- test_read_of_access_disabled_region,
- test_write_of_write_disabled_region,
- test_write_of_access_disabled_region,
- test_kernel_write_of_access_disabled_region,
- test_kernel_write_of_write_disabled_region,
- test_kernel_gup_of_access_disabled_region,
- test_kernel_gup_write_to_write_disabled_region,
- test_executing_on_unreadable_memory,
- test_implicit_mprotect_exec_only_memory,
- test_mprotect_with_pkey_0,
- test_ptrace_of_child,
- test_pkey_syscalls_on_non_allocated_pkey,
- test_pkey_syscalls_bad_args,
- test_pkey_alloc_exhaust,
-};
-
-void run_tests_once(void)
-{
- int *ptr;
- int prot = PROT_READ|PROT_WRITE;
-
- for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
- int pkey;
- int orig_pkru_faults = pkru_faults;
-
- dprintf1("======================\n");
- dprintf1("test %d preparing...\n", test_nr);
-
- tracing_on();
- pkey = alloc_random_pkey();
- dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
- ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
- dprintf1("test %d starting...\n", test_nr);
- pkey_tests[test_nr](ptr, pkey);
- dprintf1("freeing test memory: %p\n", ptr);
- free_pkey_malloc(ptr);
- sys_pkey_free(pkey);
-
- dprintf1("pkru_faults: %d\n", pkru_faults);
- dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
-
- tracing_off();
- close_test_fds();
-
- printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
- dprintf1("======================\n\n");
- }
- iteration_nr++;
-}
-
-void pkey_setup_shadow(void)
-{
- shadow_pkru = __rdpkru();
-}
-
-int main(void)
-{
- int nr_iterations = 22;
-
- setup_handlers();
-
- printf("has pku: %d\n", cpu_has_pku());
-
- if (!cpu_has_pku()) {
- int size = PAGE_SIZE;
- int *ptr;
-
- printf("running PKEY tests for unsupported CPU/OS\n");
-
- ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
- assert(ptr != (void *)-1);
- test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
- exit(0);
- }
-
- pkey_setup_shadow();
- printf("startup pkru: %x\n", rdpkru());
- setup_hugetlbfs();
-
- while (nr_iterations-- > 0)
- run_tests_once();
-
- printf("done (all tests OK)\n");
- return 0;
-}
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index 6f22238f3217..12aaa063196e 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -414,8 +414,12 @@ int main()
#if defined(__i386__) && (!defined(__GLIBC__) || __GLIBC__ > 2 || __GLIBC_MINOR__ >= 16)
vsyscall32 = (void *)getauxval(AT_SYSINFO);
- printf("[RUN]\tCheck AT_SYSINFO return regs\n");
- test_sys32_regs(do_full_vsyscall32);
+ if (vsyscall32) {
+ printf("[RUN]\tCheck AT_SYSINFO return regs\n");
+ test_sys32_regs(do_full_vsyscall32);
+ } else {
+ printf("[SKIP]\tAT_SYSINFO is not available\n");
+ }
#endif
test_ptrace_syscall_restart();
diff --git a/tools/testing/selftests/x86/raw_syscall_helper_32.S b/tools/testing/selftests/x86/raw_syscall_helper_32.S
index 94410fa2b5ed..a10d36afdca0 100644
--- a/tools/testing/selftests/x86/raw_syscall_helper_32.S
+++ b/tools/testing/selftests/x86/raw_syscall_helper_32.S
@@ -45,3 +45,5 @@ int80_and_ret:
.type int80_and_ret, @function
.size int80_and_ret, .-int80_and_ret
+
+.section .note.GNU-stack,"",%progbits
diff --git a/tools/testing/selftests/x86/sigaltstack.c b/tools/testing/selftests/x86/sigaltstack.c
new file mode 100644
index 000000000000..f689af75e979
--- /dev/null
+++ b/tools/testing/selftests/x86/sigaltstack.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include <signal.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <setjmp.h>
+
+/* sigaltstack()-enforced minimum stack */
+#define ENFORCED_MINSIGSTKSZ 2048
+
+#ifndef AT_MINSIGSTKSZ
+# define AT_MINSIGSTKSZ 51
+#endif
+
+static int nerrs;
+
+static bool sigalrm_expected;
+
+static unsigned long at_minstack_size;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static int setup_altstack(void *start, unsigned long size)
+{
+ stack_t ss;
+
+ memset(&ss, 0, sizeof(ss));
+ ss.ss_size = size;
+ ss.ss_sp = start;
+
+ return sigaltstack(&ss, NULL);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ if (sigalrm_expected) {
+ printf("[FAIL]\tWrong signal delivered: SIGSEGV (expected SIGALRM).");
+ nerrs++;
+ } else {
+ printf("[OK]\tSIGSEGV signal delivered.\n");
+ }
+
+ siglongjmp(jmpbuf, 1);
+}
+
+static void sigalrm(int sig, siginfo_t *info, void *ctx_void)
+{
+ if (!sigalrm_expected) {
+ printf("[FAIL]\tWrong signal delivered: SIGALRM (expected SIGSEGV).");
+ nerrs++;
+ } else {
+ printf("[OK]\tSIGALRM signal delivered.\n");
+ }
+}
+
+static void test_sigaltstack(void *altstack, unsigned long size)
+{
+ if (setup_altstack(altstack, size))
+ err(1, "sigaltstack()");
+
+ sigalrm_expected = (size > at_minstack_size) ? true : false;
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ sethandler(SIGALRM, sigalrm, SA_ONSTACK);
+
+ if (!sigsetjmp(jmpbuf, 1)) {
+ printf("[RUN]\tTest an alternate signal stack of %ssufficient size.\n",
+ sigalrm_expected ? "" : "in");
+ printf("\tRaise SIGALRM. %s is expected to be delivered.\n",
+ sigalrm_expected ? "It" : "SIGSEGV");
+ raise(SIGALRM);
+ }
+
+ clearhandler(SIGALRM);
+ clearhandler(SIGSEGV);
+}
+
+int main(void)
+{
+ void *altstack;
+
+ at_minstack_size = getauxval(AT_MINSIGSTKSZ);
+
+ altstack = mmap(NULL, at_minstack_size + SIGSTKSZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (altstack == MAP_FAILED)
+ err(1, "mmap()");
+
+ if ((ENFORCED_MINSIGSTKSZ + 1) < at_minstack_size)
+ test_sigaltstack(altstack, ENFORCED_MINSIGSTKSZ + 1);
+
+ test_sigaltstack(altstack, at_minstack_size + SIGSTKSZ);
+
+ return nerrs == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 57c4f67f16ef..5d7961a5f7f6 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -138,9 +138,6 @@ static unsigned short LDT3(int idx)
return (idx << 3) | 7;
}
-/* Our sigaltstack scratch space. */
-static char altstack_data[SIGSTKSZ];
-
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -771,7 +768,8 @@ int main()
setup_ldt();
stack_t stack = {
- .ss_sp = altstack_data,
+ /* Our sigaltstack scratch space. */
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
@@ -872,5 +870,6 @@ int main()
total_nerrs += test_nonstrict_ss();
#endif
+ free(stack.ss_sp);
return total_nerrs ? 1 : 0;
}
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 1063328e275c..9a30f443e928 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -31,6 +31,8 @@
#include <sys/ptrace.h>
#include <sys/user.h>
+#include "helpers.h"
+
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -55,7 +57,6 @@ static void clearhandler(int sig)
static volatile sig_atomic_t sig_traps, sig_eflags;
sigjmp_buf jmpbuf;
-static unsigned char altstack_data[SIGSTKSZ];
#ifdef __x86_64__
# define REG_IP REG_RIP
@@ -67,21 +68,6 @@ static unsigned char altstack_data[SIGSTKSZ];
# define INT80_CLOBBERS
#endif
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
-#define X86_EFLAGS_TF (1UL << 8)
-
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t*)ctx_void;
@@ -223,7 +209,7 @@ int main()
unsigned long nr = SYS_getpid;
printf("[RUN]\tSet TF and check SYSENTER\n");
stack_t stack = {
- .ss_sp = altstack_data,
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
@@ -232,6 +218,7 @@ int main()
SA_RESETHAND | SA_ONSTACK);
sethandler(SIGILL, print_and_longjmp, SA_RESETHAND);
set_eflags(get_eflags() | X86_EFLAGS_TF);
+ free(stack.ss_sp);
/* Clear EBP first to make sure we segfault cleanly. */
asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx"
#ifdef __x86_64__
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index bc0ecc2e862e..461fa41a4d02 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -15,29 +15,7 @@
#include <setjmp.h>
#include <errno.h>
-#ifdef __x86_64__
-# define WIDTH "q"
-#else
-# define WIDTH "l"
-#endif
-
-/* Our sigaltstack scratch space. */
-static unsigned char altstack_data[SIGSTKSZ];
-
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
-#define X86_EFLAGS_TF (1UL << 8)
+#include "helpers.h"
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
@@ -72,6 +50,7 @@ static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
if (ax != -EFAULT && ax != -ENOSYS) {
printf("[FAIL]\tAX had the wrong value: 0x%lx\n",
(unsigned long)ax);
+ printf("\tIP = 0x%lx\n", (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
n_errs++;
} else {
printf("[OK]\tSeems okay\n");
@@ -122,7 +101,8 @@ static void sigill(int sig, siginfo_t *info, void *ctx_void)
int main()
{
stack_t stack = {
- .ss_sp = altstack_data,
+ /* Our sigaltstack scratch space. */
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
@@ -226,5 +206,31 @@ int main()
}
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+#ifdef __x86_64__
+ printf("[RUN]\tSYSENTER with TF, invalid state, and GSBASE < 0\n");
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ sigtrap_consecutive_syscalls = 0;
+
+ asm volatile ("wrgsbase %%rax\n\t"
+ :: "a" (0xffffffffffff0000UL));
+
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "sysenter"
+ : : : "memory", "flags");
+ }
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+#endif
+
+ free(stack.ss_sp);
return 0;
}
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
index 02309a195041..a108b80dd082 100644
--- a/tools/testing/selftests/x86/syscall_nt.c
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -13,29 +13,11 @@
#include <signal.h>
#include <err.h>
#include <sys/syscall.h>
-#include <asm/processor-flags.h>
-#ifdef __x86_64__
-# define WIDTH "q"
-#else
-# define WIDTH "l"
-#endif
+#include "helpers.h"
static unsigned int nerrs;
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -59,6 +41,7 @@ static void do_it(unsigned long extraflags)
set_eflags(get_eflags() | extraflags);
syscall(SYS_getpid);
flags = get_eflags();
+ set_eflags(X86_EFLAGS_IF | X86_EFLAGS_FIXED);
if ((flags & extraflags) == extraflags) {
printf("[OK]\tThe syscall worked and flags are still set\n");
} else {
@@ -73,6 +56,12 @@ int main(void)
printf("[RUN]\tSet NT and issue a syscall\n");
do_it(X86_EFLAGS_NT);
+ printf("[RUN]\tSet AC and issue a syscall\n");
+ do_it(X86_EFLAGS_AC);
+
+ printf("[RUN]\tSet NT|AC and issue a syscall\n");
+ do_it(X86_EFLAGS_NT | X86_EFLAGS_AC);
+
/*
* Now try it again with TF set -- TF forces returns via IRET in all
* cases except non-ptregs-using 64-bit full fast path syscalls.
@@ -80,8 +69,28 @@ int main(void)
sethandler(SIGTRAP, sigtrap, 0);
+ printf("[RUN]\tSet TF and issue a syscall\n");
+ do_it(X86_EFLAGS_TF);
+
printf("[RUN]\tSet NT|TF and issue a syscall\n");
do_it(X86_EFLAGS_NT | X86_EFLAGS_TF);
+ printf("[RUN]\tSet AC|TF and issue a syscall\n");
+ do_it(X86_EFLAGS_AC | X86_EFLAGS_TF);
+
+ printf("[RUN]\tSet NT|AC|TF and issue a syscall\n");
+ do_it(X86_EFLAGS_NT | X86_EFLAGS_AC | X86_EFLAGS_TF);
+
+ /*
+ * Now try DF. This is evil and it's plausible that we will crash
+ * glibc, but glibc would have to do something rather surprising
+ * for this to happen.
+ */
+ printf("[RUN]\tSet DF and issue a syscall\n");
+ do_it(X86_EFLAGS_DF);
+
+ printf("[RUN]\tSet TF|DF and issue a syscall\n");
+ do_it(X86_EFLAGS_TF | X86_EFLAGS_DF);
+
return nerrs == 0 ? 0 : 1;
}
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index d6b09cb1aa2c..991591718bb0 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -1,6 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * syscall_numbering.c - test calling the x86-64 kernel with various
+ * valid and invalid system call numbers.
+ *
* Copyright (c) 2018 Andrew Lutomirski
*/
@@ -11,79 +13,470 @@
#include <stdbool.h>
#include <errno.h>
#include <unistd.h>
-#include <syscall.h>
+#include <string.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <sysexits.h>
-static int nerrs;
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
-#define X32_BIT 0x40000000UL
+#include <linux/ptrace.h>
-static void check_enosys(unsigned long nr, bool *ok)
+/* Common system call numbers */
+#define SYS_READ 0
+#define SYS_WRITE 1
+#define SYS_GETPID 39
+/* x64-only system call numbers */
+#define X64_IOCTL 16
+#define X64_READV 19
+#define X64_WRITEV 20
+/* x32-only system call numbers (without X32_BIT) */
+#define X32_IOCTL 514
+#define X32_READV 515
+#define X32_WRITEV 516
+
+#define X32_BIT 0x40000000
+
+static int nullfd = -1; /* File descriptor for /dev/null */
+static bool with_x32; /* x32 supported on this kernel? */
+
+enum ptrace_pass {
+ PTP_NOTHING,
+ PTP_GETREGS,
+ PTP_WRITEBACK,
+ PTP_FUZZRET,
+ PTP_FUZZHIGH,
+ PTP_INTNUM,
+ PTP_DONE
+};
+
+static const char * const ptrace_pass_name[] =
{
- /* If this fails, a segfault is reasonably likely. */
- fflush(stdout);
+ [PTP_NOTHING] = "just stop, no data read",
+ [PTP_GETREGS] = "only getregs",
+ [PTP_WRITEBACK] = "getregs, unmodified setregs",
+ [PTP_FUZZRET] = "modifying the default return",
+ [PTP_FUZZHIGH] = "clobbering the top 32 bits",
+ [PTP_INTNUM] = "sign-extending the syscall number",
+};
- long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
- if (ret == 0) {
- printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
- *ok = false;
- } else if (errno != ENOSYS) {
- printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
- *ok = false;
- }
+/*
+ * Shared memory block between tracer and test
+ */
+struct shared {
+ unsigned int nerr; /* Total error count */
+ unsigned int indent; /* Message indentation level */
+ enum ptrace_pass ptrace_pass;
+ bool probing_syscall; /* In probe_syscall() */
+};
+static volatile struct shared *sh;
+
+static inline unsigned int offset(void)
+{
+ unsigned int level = sh ? sh->indent : 0;
+
+ return 8 + level * 4;
}
-static void test_x32_without_x32_bit(void)
+#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
+ ## __VA_ARGS__)
+
+#define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__)
+#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
+#define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__)
+
+#define fail(fmt, ...) \
+ do { \
+ msg(FAIL, fmt, ## __VA_ARGS__); \
+ sh->nerr++; \
+ } while (0)
+
+#define crit(fmt, ...) \
+ do { \
+ sh->indent = 0; \
+ msg(FAIL, fmt, ## __VA_ARGS__); \
+ msg(SKIP, "Unable to run test\n"); \
+ exit(EX_OSERR); \
+ } while (0)
+
+/* Sentinel for ptrace-modified return value */
+#define MODIFIED_BY_PTRACE -9999
+
+/*
+ * Directly invokes the given syscall with nullfd as the first argument
+ * and the rest zero. Avoids involving glibc wrappers in case they ever
+ * end up intercepting some system calls for some reason, or modify
+ * the system call number itself.
+ */
+static long long probe_syscall(int msb, int lsb)
{
- bool ok = true;
+ register long long arg1 asm("rdi") = nullfd;
+ register long long arg2 asm("rsi") = 0;
+ register long long arg3 asm("rdx") = 0;
+ register long long arg4 asm("r10") = 0;
+ register long long arg5 asm("r8") = 0;
+ register long long arg6 asm("r9") = 0;
+ long long nr = ((long long)msb << 32) | (unsigned int)lsb;
+ long long ret;
/*
- * Syscalls 512-547 are "x32" syscalls. They are intended to be
- * called with the x32 (0x40000000) bit set. Calling them without
- * the x32 bit set is nonsense and should not work.
+ * We pass in an extra copy of the extended system call number
+ * in %rbx, so we can examine it from the ptrace handler without
+ * worrying about it being possibly modified. This is to test
+ * the validity of struct user regs.orig_rax a.k.a.
+ * struct pt_regs.orig_ax.
*/
- printf("[RUN]\tChecking syscalls 512-547\n");
- for (int i = 512; i <= 547; i++)
- check_enosys(i, &ok);
+ sh->probing_syscall = true;
+ asm volatile("syscall"
+ : "=a" (ret)
+ : "a" (nr), "b" (nr),
+ "r" (arg1), "r" (arg2), "r" (arg3),
+ "r" (arg4), "r" (arg5), "r" (arg6)
+ : "rcx", "r11", "memory", "cc");
+ sh->probing_syscall = false;
+
+ return ret;
+}
+
+static const char *syscall_str(int msb, int start, int end)
+{
+ static char buf[64];
+ const char * const type = (start & X32_BIT) ? "x32" : "x64";
+ int lsb = start;
/*
- * Check that a handful of 64-bit-only syscalls are rejected if the x32
- * bit is set.
+ * Improve readability by stripping the x32 bit, but round
+ * toward zero so we don't display -1 as -1073741825.
*/
- printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
- check_enosys(16 | X32_BIT, &ok); /* ioctl */
- check_enosys(19 | X32_BIT, &ok); /* readv */
- check_enosys(20 | X32_BIT, &ok); /* writev */
+ if (lsb < 0)
+ lsb |= X32_BIT;
+ else
+ lsb &= ~X32_BIT;
+
+ if (start == end)
+ snprintf(buf, sizeof buf, "%s syscall %d:%d",
+ type, msb, lsb);
+ else
+ snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
+ type, msb, lsb, lsb + (end-start));
+
+ return buf;
+}
+
+static unsigned int _check_for(int msb, int start, int end, long long expect,
+ const char *expect_str)
+{
+ unsigned int err = 0;
+
+ sh->indent++;
+ if (start != end)
+ sh->indent++;
+
+ for (int nr = start; nr <= end; nr++) {
+ long long ret = probe_syscall(msb, nr);
+
+ if (ret != expect) {
+ fail("%s returned %lld, but it should have returned %s\n",
+ syscall_str(msb, nr, nr),
+ ret, expect_str);
+ err++;
+ }
+ }
+
+ if (start != end)
+ sh->indent--;
+
+ if (err) {
+ if (start != end)
+ fail("%s had %u failure%s\n",
+ syscall_str(msb, start, end),
+ err, err == 1 ? "s" : "");
+ } else {
+ ok("%s returned %s as expected\n",
+ syscall_str(msb, start, end), expect_str);
+ }
+
+ sh->indent--;
+
+ return err;
+}
+
+#define check_for(msb,start,end,expect) \
+ _check_for(msb,start,end,expect,#expect)
+
+static bool check_zero(int msb, int nr)
+{
+ return check_for(msb, nr, nr, 0);
+}
+
+static bool check_enosys(int msb, int nr)
+{
+ return check_for(msb, nr, nr, -ENOSYS);
+}
+
+/*
+ * Anyone diagnosing a failure will want to know whether the kernel
+ * supports x32. Tell them. This can also be used to conditionalize
+ * tests based on existence or nonexistence of x32.
+ */
+static bool test_x32(void)
+{
+ long long ret;
+ pid_t mypid = getpid();
+
+ run("Checking for x32 by calling x32 getpid()\n");
+ ret = probe_syscall(0, SYS_GETPID | X32_BIT);
+
+ sh->indent++;
+ if (ret == mypid) {
+ info("x32 is supported\n");
+ with_x32 = true;
+ } else if (ret == -ENOSYS) {
+ info("x32 is not supported\n");
+ with_x32 = false;
+ } else {
+ fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
+ with_x32 = false;
+ }
+ sh->indent--;
+ return with_x32;
+}
+
+static void test_syscalls_common(int msb)
+{
+ enum ptrace_pass pass = sh->ptrace_pass;
+
+ run("Checking some common syscalls as 64 bit\n");
+ check_zero(msb, SYS_READ);
+ check_zero(msb, SYS_WRITE);
+
+ run("Checking some 64-bit only syscalls as 64 bit\n");
+ check_zero(msb, X64_READV);
+ check_zero(msb, X64_WRITEV);
+
+ run("Checking out of range system calls\n");
+ check_for(msb, -64, -2, -ENOSYS);
+ if (pass >= PTP_FUZZRET)
+ check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
+ else
+ check_for(msb, -1, -1, -ENOSYS);
+ check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
+ check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
+ check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
+}
+static void test_syscalls_with_x32(int msb)
+{
/*
- * Check some syscalls with high bits set.
+ * Syscalls 512-547 are "x32" syscalls. They are
+ * intended to be called with the x32 (0x40000000) bit
+ * set. Calling them without the x32 bit set is
+ * nonsense and should not work.
*/
- printf("[RUN]\tChecking numbers above 2^32-1\n");
- check_enosys((1UL << 32), &ok);
- check_enosys(X32_BIT | (1UL << 32), &ok);
+ run("Checking x32 syscalls as 64 bit\n");
+ check_for(msb, 512, 547, -ENOSYS);
- if (!ok)
- nerrs++;
- else
- printf("[OK]\tThey all returned -ENOSYS\n");
+ run("Checking some common syscalls as x32\n");
+ check_zero(msb, SYS_READ | X32_BIT);
+ check_zero(msb, SYS_WRITE | X32_BIT);
+
+ run("Checking some x32 syscalls as x32\n");
+ check_zero(msb, X32_READV | X32_BIT);
+ check_zero(msb, X32_WRITEV | X32_BIT);
+
+ run("Checking some 64-bit syscalls as x32\n");
+ check_enosys(msb, X64_IOCTL | X32_BIT);
+ check_enosys(msb, X64_READV | X32_BIT);
+ check_enosys(msb, X64_WRITEV | X32_BIT);
}
-int main()
+static void test_syscalls_without_x32(int msb)
{
+ run("Checking for absence of x32 system calls\n");
+ check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
+}
+
+static void test_syscall_numbering(void)
+{
+ static const int msbs[] = {
+ 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
+ INT_MIN, INT_MIN+1
+ };
+
+ sh->indent++;
+
/*
- * Anyone diagnosing a failure will want to know whether the kernel
- * supports x32. Tell them.
+ * The MSB is supposed to be ignored, so we loop over a few
+ * to test that out.
*/
- printf("\tChecking for x32...");
- fflush(stdout);
- if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
- printf(" supported\n");
- } else if (errno == ENOSYS) {
- printf(" not supported\n");
+ for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
+ int msb = msbs[i];
+ run("Checking system calls with msb = %d (0x%x)\n",
+ msb, msb);
+
+ sh->indent++;
+
+ test_syscalls_common(msb);
+ if (with_x32)
+ test_syscalls_with_x32(msb);
+ else
+ test_syscalls_without_x32(msb);
+
+ sh->indent--;
+ }
+
+ sh->indent--;
+}
+
+static void syscall_numbering_tracee(void)
+{
+ enum ptrace_pass pass;
+
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+ crit("Failed to request tracing\n");
+ return;
+ }
+ raise(SIGSTOP);
+
+ for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
+ sh->ptrace_pass = ++pass) {
+ run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
+ test_syscall_numbering();
+ }
+}
+
+static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
+{
+ struct user_regs_struct regs;
+
+ sh->probing_syscall = false; /* Do this on entry only */
+
+ /* For these, don't even getregs */
+ if (pass == PTP_NOTHING || pass == PTP_DONE)
+ return;
+
+ ptrace(PTRACE_GETREGS, testpid, NULL, &regs);
+
+ if (regs.orig_rax != regs.rbx) {
+ fail("orig_rax %#llx doesn't match syscall number %#llx\n",
+ (unsigned long long)regs.orig_rax,
+ (unsigned long long)regs.rbx);
+ }
+
+ switch (pass) {
+ case PTP_GETREGS:
+ /* Just read, no writeback */
+ return;
+ case PTP_WRITEBACK:
+ /* Write back the same register state verbatim */
+ break;
+ case PTP_FUZZRET:
+ regs.rax = MODIFIED_BY_PTRACE;
+ break;
+ case PTP_FUZZHIGH:
+ regs.rax = MODIFIED_BY_PTRACE;
+ regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
+ break;
+ case PTP_INTNUM:
+ regs.rax = MODIFIED_BY_PTRACE;
+ regs.orig_rax = (int)regs.orig_rax;
+ break;
+ default:
+ crit("invalid ptrace_pass\n");
+ break;
+ }
+
+ ptrace(PTRACE_SETREGS, testpid, NULL, &regs);
+}
+
+static void syscall_numbering_tracer(pid_t testpid)
+{
+ int wstatus;
+
+ do {
+ pid_t wpid = waitpid(testpid, &wstatus, 0);
+ if (wpid < 0 && errno != EINTR)
+ break;
+ if (wpid != testpid)
+ continue;
+ if (!WIFSTOPPED(wstatus))
+ break; /* Thread exited? */
+
+ if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
+ mess_with_syscall(testpid, sh->ptrace_pass);
+ } while (sh->ptrace_pass != PTP_DONE &&
+ !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
+
+ ptrace(PTRACE_DETACH, testpid, NULL, NULL);
+
+ /* Wait for the child process to terminate */
+ while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
+ /* wait some more */;
+}
+
+static void test_traced_syscall_numbering(void)
+{
+ pid_t testpid;
+
+ /* Launch the test thread; this thread continues as the tracer thread */
+ testpid = fork();
+
+ if (testpid < 0) {
+ crit("Unable to launch tracer process\n");
+ } else if (testpid == 0) {
+ syscall_numbering_tracee();
+ _exit(0);
} else {
- printf(" confused\n");
+ syscall_numbering_tracer(testpid);
}
+}
- test_x32_without_x32_bit();
+int main(void)
+{
+ unsigned int nerr;
- return nerrs ? 1 : 0;
+ /*
+ * It is quite likely to get a segfault on a failure, so make
+ * sure the message gets out by setting stdout to nonbuffered.
+ */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ /*
+ * Harmless file descriptor to work on...
+ */
+ nullfd = open("/dev/null", O_RDWR);
+ if (nullfd < 0) {
+ crit("Unable to open /dev/null: %s\n", strerror(errno));
+ }
+
+ /*
+ * Set up a block of shared memory...
+ */
+ sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_SHARED, 0, 0);
+ if (sh == MAP_FAILED) {
+ crit("Unable to allocated shared memory block: %s\n",
+ strerror(errno));
+ }
+
+ with_x32 = test_x32();
+
+ run("Running tests without ptrace...\n");
+ test_syscall_numbering();
+
+ test_traced_syscall_numbering();
+
+ nerr = sh->nerr;
+ if (!nerr) {
+ ok("All system calls succeeded or failed as expected\n");
+ return 0;
+ } else {
+ fail("A total of %u system call%s had incorrect behavior\n",
+ nerr, nerr != 1 ? "s" : "");
+ return 1;
+ }
}
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
deleted file mode 100644
index 35edd61d1663..000000000000
--- a/tools/testing/selftests/x86/test_vdso.c
+++ /dev/null
@@ -1,337 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * ldt_gdt.c - Test cases for LDT and GDT access
- * Copyright (c) 2011-2015 Andrew Lutomirski
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <sys/time.h>
-#include <time.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <dlfcn.h>
-#include <string.h>
-#include <errno.h>
-#include <sched.h>
-#include <stdbool.h>
-#include <limits.h>
-
-#ifndef SYS_getcpu
-# ifdef __x86_64__
-# define SYS_getcpu 309
-# else
-# define SYS_getcpu 318
-# endif
-#endif
-
-/* max length of lines in /proc/self/maps - anything longer is skipped here */
-#define MAPS_LINE_LEN 128
-
-int nerrs = 0;
-
-typedef int (*vgettime_t)(clockid_t, struct timespec *);
-
-vgettime_t vdso_clock_gettime;
-
-typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
-
-vgtod_t vdso_gettimeofday;
-
-typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
-
-getcpu_t vgetcpu;
-getcpu_t vdso_getcpu;
-
-static void *vsyscall_getcpu(void)
-{
-#ifdef __x86_64__
- FILE *maps;
- char line[MAPS_LINE_LEN];
- bool found = false;
-
- maps = fopen("/proc/self/maps", "r");
- if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
- return NULL;
-
- while (fgets(line, MAPS_LINE_LEN, maps)) {
- char r, x;
- void *start, *end;
- char name[MAPS_LINE_LEN];
-
- /* sscanf() is safe here as strlen(name) >= strlen(line) */
- if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
- &start, &end, &r, &x, name) != 5)
- continue;
-
- if (strcmp(name, "[vsyscall]"))
- continue;
-
- /* assume entries are OK, as we test vDSO here not vsyscall */
- found = true;
- break;
- }
-
- fclose(maps);
-
- if (!found) {
- printf("Warning: failed to find vsyscall getcpu\n");
- return NULL;
- }
- return (void *) (0xffffffffff600800);
-#else
- return NULL;
-#endif
-}
-
-
-static void fill_function_pointers()
-{
- void *vdso = dlopen("linux-vdso.so.1",
- RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
- if (!vdso)
- vdso = dlopen("linux-gate.so.1",
- RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
- if (!vdso) {
- printf("[WARN]\tfailed to find vDSO\n");
- return;
- }
-
- vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
- if (!vdso_getcpu)
- printf("Warning: failed to find getcpu in vDSO\n");
-
- vgetcpu = (getcpu_t) vsyscall_getcpu();
-
- vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
- if (!vdso_clock_gettime)
- printf("Warning: failed to find clock_gettime in vDSO\n");
-
- vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
- if (!vdso_gettimeofday)
- printf("Warning: failed to find gettimeofday in vDSO\n");
-
-}
-
-static long sys_getcpu(unsigned * cpu, unsigned * node,
- void* cache)
-{
- return syscall(__NR_getcpu, cpu, node, cache);
-}
-
-static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
-{
- return syscall(__NR_clock_gettime, id, ts);
-}
-
-static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
-{
- return syscall(__NR_gettimeofday, tv, tz);
-}
-
-static void test_getcpu(void)
-{
- printf("[RUN]\tTesting getcpu...\n");
-
- for (int cpu = 0; ; cpu++) {
- cpu_set_t cpuset;
- CPU_ZERO(&cpuset);
- CPU_SET(cpu, &cpuset);
- if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
- return;
-
- unsigned cpu_sys, cpu_vdso, cpu_vsys,
- node_sys, node_vdso, node_vsys;
- long ret_sys, ret_vdso = 1, ret_vsys = 1;
- unsigned node;
-
- ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
- if (vdso_getcpu)
- ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
- if (vgetcpu)
- ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
-
- if (!ret_sys)
- node = node_sys;
- else if (!ret_vdso)
- node = node_vdso;
- else if (!ret_vsys)
- node = node_vsys;
-
- bool ok = true;
- if (!ret_sys && (cpu_sys != cpu || node_sys != node))
- ok = false;
- if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
- ok = false;
- if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
- ok = false;
-
- printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
- if (!ret_sys)
- printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
- if (!ret_vdso)
- printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
- if (!ret_vsys)
- printf(" vsyscall: cpu %u, node %u", cpu_vsys,
- node_vsys);
- printf("\n");
-
- if (!ok)
- nerrs++;
- }
-}
-
-static bool ts_leq(const struct timespec *a, const struct timespec *b)
-{
- if (a->tv_sec != b->tv_sec)
- return a->tv_sec < b->tv_sec;
- else
- return a->tv_nsec <= b->tv_nsec;
-}
-
-static bool tv_leq(const struct timeval *a, const struct timeval *b)
-{
- if (a->tv_sec != b->tv_sec)
- return a->tv_sec < b->tv_sec;
- else
- return a->tv_usec <= b->tv_usec;
-}
-
-static char const * const clocknames[] = {
- [0] = "CLOCK_REALTIME",
- [1] = "CLOCK_MONOTONIC",
- [2] = "CLOCK_PROCESS_CPUTIME_ID",
- [3] = "CLOCK_THREAD_CPUTIME_ID",
- [4] = "CLOCK_MONOTONIC_RAW",
- [5] = "CLOCK_REALTIME_COARSE",
- [6] = "CLOCK_MONOTONIC_COARSE",
- [7] = "CLOCK_BOOTTIME",
- [8] = "CLOCK_REALTIME_ALARM",
- [9] = "CLOCK_BOOTTIME_ALARM",
- [10] = "CLOCK_SGI_CYCLE",
- [11] = "CLOCK_TAI",
-};
-
-static void test_one_clock_gettime(int clock, const char *name)
-{
- struct timespec start, vdso, end;
- int vdso_ret, end_ret;
-
- printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
-
- if (sys_clock_gettime(clock, &start) < 0) {
- if (errno == EINVAL) {
- vdso_ret = vdso_clock_gettime(clock, &vdso);
- if (vdso_ret == -EINVAL) {
- printf("[OK]\tNo such clock.\n");
- } else {
- printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
- nerrs++;
- }
- } else {
- printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
- }
- return;
- }
-
- vdso_ret = vdso_clock_gettime(clock, &vdso);
- end_ret = sys_clock_gettime(clock, &end);
-
- if (vdso_ret != 0 || end_ret != 0) {
- printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
- vdso_ret, errno);
- nerrs++;
- return;
- }
-
- printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
- (unsigned long long)start.tv_sec, start.tv_nsec,
- (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
- (unsigned long long)end.tv_sec, end.tv_nsec);
-
- if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
- printf("[FAIL]\tTimes are out of sequence\n");
- nerrs++;
- }
-}
-
-static void test_clock_gettime(void)
-{
- for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
- clock++) {
- test_one_clock_gettime(clock, clocknames[clock]);
- }
-
- /* Also test some invalid clock ids */
- test_one_clock_gettime(-1, "invalid");
- test_one_clock_gettime(INT_MIN, "invalid");
- test_one_clock_gettime(INT_MAX, "invalid");
-}
-
-static void test_gettimeofday(void)
-{
- struct timeval start, vdso, end;
- struct timezone sys_tz, vdso_tz;
- int vdso_ret, end_ret;
-
- if (!vdso_gettimeofday)
- return;
-
- printf("[RUN]\tTesting gettimeofday...\n");
-
- if (sys_gettimeofday(&start, &sys_tz) < 0) {
- printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
- nerrs++;
- return;
- }
-
- vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
- end_ret = sys_gettimeofday(&end, NULL);
-
- if (vdso_ret != 0 || end_ret != 0) {
- printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
- vdso_ret, errno);
- nerrs++;
- return;
- }
-
- printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
- (unsigned long long)start.tv_sec, start.tv_usec,
- (unsigned long long)vdso.tv_sec, vdso.tv_usec,
- (unsigned long long)end.tv_sec, end.tv_usec);
-
- if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
- printf("[FAIL]\tTimes are out of sequence\n");
- nerrs++;
- }
-
- if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
- sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
- printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
- sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
- } else {
- printf("[FAIL]\ttimezones do not match\n");
- nerrs++;
- }
-
- /* And make sure that passing NULL for tz doesn't crash. */
- vdso_gettimeofday(&vdso, NULL);
-}
-
-int main(int argc, char **argv)
-{
- fill_function_pointers();
-
- test_clock_gettime();
- test_gettimeofday();
-
- /*
- * Test getcpu() last so that, if something goes wrong setting affinity,
- * we still run the other tests.
- */
- test_getcpu();
-
- return nerrs ? 1 : 0;
-}
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index a4f4d4cf22c3..5b45e6986aea 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -20,6 +20,8 @@
#include <setjmp.h>
#include <sys/uio.h>
+#include "helpers.h"
+
#ifdef __x86_64__
# define VSYS(x) (x)
#else
@@ -460,6 +462,17 @@ static int test_vsys_x(void)
return 0;
}
+/*
+ * Debuggers expect ptrace() to be able to peek at the vsyscall page.
+ * Use process_vm_readv() as a proxy for ptrace() to test this. We
+ * want it to work in the vsyscall=emulate case and to fail in the
+ * vsyscall=xonly case.
+ *
+ * It's worth noting that this ABI is a bit nutty. write(2) can't
+ * read from the vsyscall page on any kernel version or mode. The
+ * fact that ptrace() ever worked was a nice courtesy of old kernels,
+ * but the code to support it is fairly gross.
+ */
static int test_process_vm_readv(void)
{
#ifdef __x86_64__
@@ -475,17 +488,24 @@ static int test_process_vm_readv(void)
remote.iov_len = 4096;
ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0);
if (ret != 4096) {
- printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno);
- return 0;
+ /*
+ * We expect process_vm_readv() to work if and only if the
+ * vsyscall page is readable.
+ */
+ printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno);
+ return vsyscall_map_r ? 1 : 0;
}
if (vsyscall_map_r) {
- if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) {
+ if (!memcmp(buf, remote.iov_base, sizeof(buf))) {
printf("[OK]\tIt worked and read correct data\n");
} else {
printf("[FAIL]\tIt worked but returned incorrect data\n");
return 1;
}
+ } else {
+ printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n");
+ return 1;
}
#endif
@@ -493,21 +513,8 @@ static int test_process_vm_readv(void)
}
#ifdef __x86_64__
-#define X86_EFLAGS_TF (1UL << 8)
static volatile sig_atomic_t num_vsyscall_traps;
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
-}
-
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t *)ctx_void;
diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S
index 1bb5d62c16a4..a2d47d8344d4 100644
--- a/tools/testing/selftests/x86/thunks.S
+++ b/tools/testing/selftests/x86/thunks.S
@@ -57,3 +57,5 @@ call32_from_64:
ret
.size call32_from_64, .-call32_from_64
+
+.section .note.GNU-stack,"",%progbits
diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S
index a71d92da8f46..f3f56e681e9f 100644
--- a/tools/testing/selftests/x86/thunks_32.S
+++ b/tools/testing/selftests/x86/thunks_32.S
@@ -45,3 +45,5 @@ call64_from_32:
ret
.size call64_from_32, .-call64_from_32
+
+.section .note.GNU-stack,"",%progbits
diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c
index 0075ccd65407..4c311e1af4c7 100644
--- a/tools/testing/selftests/x86/unwind_vdso.c
+++ b/tools/testing/selftests/x86/unwind_vdso.c
@@ -11,6 +11,8 @@
#include <features.h>
#include <stdio.h>
+#include "helpers.h"
+
#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16
int main()
@@ -53,27 +55,6 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
err(1, "sigaction");
}
-#ifdef __x86_64__
-# define WIDTH "q"
-#else
-# define WIDTH "l"
-#endif
-
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
-#define X86_EFLAGS_TF (1UL << 8)
-
static volatile sig_atomic_t nerrs;
static unsigned long sysinfo;
static bool got_sysinfo = false;
diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c
index 29a5c94c4b50..fe99f2434155 100644
--- a/tools/testing/selftests/x86/vdso_restorer.c
+++ b/tools/testing/selftests/x86/vdso_restorer.c
@@ -15,6 +15,7 @@
#include <err.h>
#include <stdio.h>
+#include <dlfcn.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
@@ -46,11 +47,23 @@ int main()
int nerrs = 0;
struct real_sigaction sa;
+ void *vdso = dlopen("linux-vdso.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-gate.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso) {
+ printf("[SKIP]\tFailed to find vDSO. Tests are not expected to work.\n");
+ return 0;
+ }
+
memset(&sa, 0, sizeof(sa));
sa.handler = handler_with_siginfo;
sa.flags = SA_SIGINFO;
sa.restorer = NULL; /* request kernel-provided restorer */
+ printf("[RUN]\tRaise a signal, SA_SIGINFO, sa.restorer == NULL\n");
+
if (syscall(SYS_rt_sigaction, SIGUSR1, &sa, NULL, 8) != 0)
err(1, "raw rt_sigaction syscall");
@@ -63,6 +76,8 @@ int main()
nerrs++;
}
+ printf("[RUN]\tRaise a signal, !SA_SIGINFO, sa.restorer == NULL\n");
+
sa.flags = 0;
sa.handler = handler_without_siginfo;
if (syscall(SYS_sigaction, SIGUSR1, &sa, 0) != 0)