diff options
Diffstat (limited to 'tools/testing/selftests/x86')
25 files changed, 4513 insertions, 537 deletions
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 5f16821c7f63..0b872c0a42d2 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -6,18 +6,20 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean UNAME_M := $(shell uname -m) -CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) -CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) -CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) +CAN_BUILD_I386 := $(shell ./check_cc.sh "$(CC)" trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./check_cc.sh "$(CC)" trivial_64bit_program.c) +CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ - test_vdso test_vsyscall mov_ss_trap \ - syscall_arg_fault + test_vsyscall mov_ss_trap \ + syscall_arg_fault fsgsbase_restore sigaltstack +TARGETS_C_BOTHBITS += nx_stack TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer -TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering +TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \ + corrupt_xstate_header amx lam test_shadow_stack # Some selftests require 32bit support enabled also on 64bit systems TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall @@ -33,7 +35,7 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) -CFLAGS := -O2 -g -std=gnu99 -pthread -Wall +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall $(KHDR_INCLUDES) # call32_from_64 in thunks.S uses absolute addresses. ifeq ($(CAN_BUILD_WITH_NOPIE),1) @@ -70,10 +72,10 @@ all_64: $(BINARIES_64) EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) -$(BINARIES_32): $(OUTPUT)/%_32: %.c +$(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm -$(BINARIES_64): $(OUTPUT)/%_64: %.c +$(BINARIES_64): $(OUTPUT)/%_64: %.c helpers.h $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl # x86_64 users should be encouraged to install 32-bit libraries @@ -91,6 +93,10 @@ warn_32bit_failure: echo "If you are using a Fedora-like distribution, try:"; \ echo ""; \ echo " yum install glibc-devel.*i686"; \ + echo ""; \ + echo "If you are using a SUSE-like distribution, try:"; \ + echo ""; \ + echo " zypper install gcc-32bit glibc-devel-static-32bit"; \ exit 0; endif @@ -104,3 +110,6 @@ $(OUTPUT)/test_syscall_vdso_32: thunks_32.S # state. $(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static $(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static + +$(OUTPUT)/nx_stack_32: CFLAGS += -Wl,-z,noexecstack +$(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c new file mode 100644 index 000000000000..d884fd69dd51 --- /dev/null +++ b/tools/testing/selftests/x86/amx.c @@ -0,0 +1,955 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <err.h> +#include <errno.h> +#include <pthread.h> +#include <setjmp.h> +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <unistd.h> +#include <x86intrin.h> + +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/shm.h> +#include <sys/ptrace.h> +#include <sys/syscall.h> +#include <sys/wait.h> +#include <sys/uio.h> + +#include "../kselftest.h" /* For __cpuid_count() */ + +#ifndef __x86_64__ +# error This test is 64-bit only +#endif + +#define XSAVE_HDR_OFFSET 512 +#define XSAVE_HDR_SIZE 64 + +struct xsave_buffer { + union { + struct { + char legacy[XSAVE_HDR_OFFSET]; + char header[XSAVE_HDR_SIZE]; + char extended[0]; + }; + char bytes[0]; + }; +}; + +static inline uint64_t xgetbv(uint32_t index) +{ + uint32_t eax, edx; + + asm volatile("xgetbv;" + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((uint64_t)edx << 32); +} + +static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm) +{ + uint32_t rfbm_lo = rfbm; + uint32_t rfbm_hi = rfbm >> 32; + + asm volatile("xsave (%%rdi)" + : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi) + : "memory"); +} + +static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm) +{ + uint32_t rfbm_lo = rfbm; + uint32_t rfbm_hi = rfbm >> 32; + + asm volatile("xrstor (%%rdi)" + : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)); +} + +/* err() exits and will not return */ +#define fatal_error(msg, ...) err(1, "[FAIL]\t" msg, ##__VA_ARGS__) + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + fatal_error("sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + fatal_error("sigaction"); +} + +#define XFEATURE_XTILECFG 17 +#define XFEATURE_XTILEDATA 18 +#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG) +#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA) +#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA) + +#define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26) +#define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27) +static inline void check_cpuid_xsave(void) +{ + uint32_t eax, ebx, ecx, edx; + + /* + * CPUID.1:ECX.XSAVE[bit 26] enumerates general + * support for the XSAVE feature set, including + * XGETBV. + */ + __cpuid_count(1, 0, eax, ebx, ecx, edx); + if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK)) + fatal_error("cpuid: no CPU xsave support"); + if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK)) + fatal_error("cpuid: no OS xsave support"); +} + +static uint32_t xbuf_size; + +static struct { + uint32_t xbuf_offset; + uint32_t size; +} xtiledata; + +#define CPUID_LEAF_XSTATE 0xd +#define CPUID_SUBLEAF_XSTATE_USER 0x0 +#define TILE_CPUID 0x1d +#define TILE_PALETTE_ID 0x1 + +static void check_cpuid_xtiledata(void) +{ + uint32_t eax, ebx, ecx, edx; + + __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER, + eax, ebx, ecx, edx); + + /* + * EBX enumerates the size (in bytes) required by the XSAVE + * instruction for an XSAVE area containing all the user state + * components corresponding to bits currently set in XCR0. + * + * Stash that off so it can be used to allocate buffers later. + */ + xbuf_size = ebx; + + __cpuid_count(CPUID_LEAF_XSTATE, XFEATURE_XTILEDATA, + eax, ebx, ecx, edx); + /* + * eax: XTILEDATA state component size + * ebx: XTILEDATA state component offset in user buffer + */ + if (!eax || !ebx) + fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d", + eax, ebx); + + xtiledata.size = eax; + xtiledata.xbuf_offset = ebx; +} + +/* The helpers for managing XSAVE buffer and tile states: */ + +struct xsave_buffer *alloc_xbuf(void) +{ + struct xsave_buffer *xbuf; + + /* XSAVE buffer should be 64B-aligned. */ + xbuf = aligned_alloc(64, xbuf_size); + if (!xbuf) + fatal_error("aligned_alloc()"); + return xbuf; +} + +static inline void clear_xstate_header(struct xsave_buffer *buffer) +{ + memset(&buffer->header, 0, sizeof(buffer->header)); +} + +static inline uint64_t get_xstatebv(struct xsave_buffer *buffer) +{ + /* XSTATE_BV is at the beginning of the header: */ + return *(uint64_t *)&buffer->header; +} + +static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv) +{ + /* XSTATE_BV is at the beginning of the header: */ + *(uint64_t *)(&buffer->header) = bv; +} + +static void set_rand_tiledata(struct xsave_buffer *xbuf) +{ + int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset]; + int data; + int i; + + /* + * Ensure that 'data' is never 0. This ensures that + * the registers are never in their initial configuration + * and thus never tracked as being in the init state. + */ + data = rand() | 1; + + for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++) + *ptr = data; +} + +struct xsave_buffer *stashed_xsave; + +static void init_stashed_xsave(void) +{ + stashed_xsave = alloc_xbuf(); + if (!stashed_xsave) + fatal_error("failed to allocate stashed_xsave\n"); + clear_xstate_header(stashed_xsave); +} + +static void free_stashed_xsave(void) +{ + free(stashed_xsave); +} + +/* See 'struct _fpx_sw_bytes' at sigcontext.h */ +#define SW_BYTES_OFFSET 464 +/* N.B. The struct's field name varies so read from the offset. */ +#define SW_BYTES_BV_OFFSET (SW_BYTES_OFFSET + 8) + +static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer) +{ + return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET); +} + +static inline uint64_t get_fpx_sw_bytes_features(void *buffer) +{ + return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET); +} + +/* Work around printf() being unsafe in signals: */ +#define SIGNAL_BUF_LEN 1000 +char signal_message_buffer[SIGNAL_BUF_LEN]; +void sig_print(char *msg) +{ + int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1; + + strncat(signal_message_buffer, msg, left); +} + +static volatile bool noperm_signaled; +static int noperm_errs; +/* + * Signal handler for when AMX is used but + * permission has not been obtained. + */ +static void handle_noperm(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + void *xbuf = ctx->uc_mcontext.fpregs; + struct _fpx_sw_bytes *sw_bytes; + uint64_t features; + + /* Reset the signal message buffer: */ + signal_message_buffer[0] = '\0'; + sig_print("\tAt SIGILL handler,\n"); + + if (si->si_code != ILL_ILLOPC) { + noperm_errs++; + sig_print("[FAIL]\tInvalid signal code.\n"); + } else { + sig_print("[OK]\tValid signal code (ILL_ILLOPC).\n"); + } + + sw_bytes = get_fpx_sw_bytes(xbuf); + /* + * Without permission, the signal XSAVE buffer should not + * have room for AMX register state (aka. xtiledata). + * Check that the size does not overlap with where xtiledata + * will reside. + * + * This also implies that no state components *PAST* + * XTILEDATA (features >=19) can be present in the buffer. + */ + if (sw_bytes->xstate_size <= xtiledata.xbuf_offset) { + sig_print("[OK]\tValid xstate size\n"); + } else { + noperm_errs++; + sig_print("[FAIL]\tInvalid xstate size\n"); + } + + features = get_fpx_sw_bytes_features(xbuf); + /* + * Without permission, the XTILEDATA feature + * bit should not be set. + */ + if ((features & XFEATURE_MASK_XTILEDATA) == 0) { + sig_print("[OK]\tValid xstate mask\n"); + } else { + noperm_errs++; + sig_print("[FAIL]\tInvalid xstate mask\n"); + } + + noperm_signaled = true; + ctx->uc_mcontext.gregs[REG_RIP] += 3; /* Skip the faulting XRSTOR */ +} + +/* Return true if XRSTOR is successful; otherwise, false. */ +static inline bool xrstor_safe(struct xsave_buffer *xbuf, uint64_t mask) +{ + noperm_signaled = false; + xrstor(xbuf, mask); + + /* Print any messages produced by the signal code: */ + printf("%s", signal_message_buffer); + /* + * Reset the buffer to make sure any future printing + * only outputs new messages: + */ + signal_message_buffer[0] = '\0'; + + if (noperm_errs) + fatal_error("saw %d errors in noperm signal handler\n", noperm_errs); + + return !noperm_signaled; +} + +/* + * Use XRSTOR to populate the XTILEDATA registers with + * random data. + * + * Return true if successful; otherwise, false. + */ +static inline bool load_rand_tiledata(struct xsave_buffer *xbuf) +{ + clear_xstate_header(xbuf); + set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA); + set_rand_tiledata(xbuf); + return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA); +} + +/* Return XTILEDATA to its initial configuration. */ +static inline void init_xtiledata(void) +{ + clear_xstate_header(stashed_xsave); + xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA); +} + +enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED }; + +/* arch_prctl() and sigaltstack() test */ + +#define ARCH_GET_XCOMP_PERM 0x1022 +#define ARCH_REQ_XCOMP_PERM 0x1023 + +static void req_xtiledata_perm(void) +{ + syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA); +} + +static void validate_req_xcomp_perm(enum expected_result exp) +{ + unsigned long bitmask, expected_bitmask; + long rc; + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask); + if (rc) { + fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc); + } else if (!(bitmask & XFEATURE_MASK_XTILECFG)) { + fatal_error("ARCH_GET_XCOMP_PERM returns XFEATURE_XTILECFG off."); + } + + rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA); + if (exp == FAIL_EXPECTED) { + if (rc) { + printf("[OK]\tARCH_REQ_XCOMP_PERM saw expected failure..\n"); + return; + } + + fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected success.\n"); + } else if (rc) { + fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected failure.\n"); + } + + expected_bitmask = bitmask | XFEATURE_MASK_XTILEDATA; + + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask); + if (rc) { + fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc); + } else if (bitmask != expected_bitmask) { + fatal_error("ARCH_REQ_XCOMP_PERM set a wrong bitmask: %lx, expected: %lx.\n", + bitmask, expected_bitmask); + } else { + printf("\tARCH_REQ_XCOMP_PERM is successful.\n"); + } +} + +static void validate_xcomp_perm(enum expected_result exp) +{ + bool load_success = load_rand_tiledata(stashed_xsave); + + if (exp == FAIL_EXPECTED) { + if (load_success) { + noperm_errs++; + printf("[FAIL]\tLoad tiledata succeeded.\n"); + } else { + printf("[OK]\tLoad tiledata failed.\n"); + } + } else if (exp == SUCCESS_EXPECTED) { + if (load_success) { + printf("[OK]\tLoad tiledata succeeded.\n"); + } else { + noperm_errs++; + printf("[FAIL]\tLoad tiledata failed.\n"); + } + } +} + +#ifndef AT_MINSIGSTKSZ +# define AT_MINSIGSTKSZ 51 +#endif + +static void *alloc_altstack(unsigned int size) +{ + void *altstack; + + altstack = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + + if (altstack == MAP_FAILED) + fatal_error("mmap() for altstack"); + + return altstack; +} + +static void setup_altstack(void *addr, unsigned long size, enum expected_result exp) +{ + stack_t ss; + int rc; + + memset(&ss, 0, sizeof(ss)); + ss.ss_size = size; + ss.ss_sp = addr; + + rc = sigaltstack(&ss, NULL); + + if (exp == FAIL_EXPECTED) { + if (rc) { + printf("[OK]\tsigaltstack() failed.\n"); + } else { + fatal_error("sigaltstack() succeeded unexpectedly.\n"); + } + } else if (rc) { + fatal_error("sigaltstack()"); + } +} + +static void test_dynamic_sigaltstack(void) +{ + unsigned int small_size, enough_size; + unsigned long minsigstksz; + void *altstack; + + minsigstksz = getauxval(AT_MINSIGSTKSZ); + printf("\tAT_MINSIGSTKSZ = %lu\n", minsigstksz); + /* + * getauxval() itself can return 0 for failure or + * success. But, in this case, AT_MINSIGSTKSZ + * will always return a >=0 value if implemented. + * Just check for 0. + */ + if (minsigstksz == 0) { + printf("no support for AT_MINSIGSTKSZ, skipping sigaltstack tests\n"); + return; + } + + enough_size = minsigstksz * 2; + + altstack = alloc_altstack(enough_size); + printf("\tAllocate memory for altstack (%u bytes).\n", enough_size); + + /* + * Try setup_altstack() with a size which can not fit + * XTILEDATA. ARCH_REQ_XCOMP_PERM should fail. + */ + small_size = minsigstksz - xtiledata.size; + printf("\tAfter sigaltstack() with small size (%u bytes).\n", small_size); + setup_altstack(altstack, small_size, SUCCESS_EXPECTED); + validate_req_xcomp_perm(FAIL_EXPECTED); + + /* + * Try setup_altstack() with a size derived from + * AT_MINSIGSTKSZ. It should be more than large enough + * and thus ARCH_REQ_XCOMP_PERM should succeed. + */ + printf("\tAfter sigaltstack() with enough size (%u bytes).\n", enough_size); + setup_altstack(altstack, enough_size, SUCCESS_EXPECTED); + validate_req_xcomp_perm(SUCCESS_EXPECTED); + + /* + * Try to coerce setup_altstack() to again accept a + * too-small altstack. This ensures that big-enough + * sigaltstacks can not shrink to a too-small value + * once XTILEDATA permission is established. + */ + printf("\tThen, sigaltstack() with small size (%u bytes).\n", small_size); + setup_altstack(altstack, small_size, FAIL_EXPECTED); +} + +static void test_dynamic_state(void) +{ + pid_t parent, child, grandchild; + + parent = fork(); + if (parent < 0) { + /* fork() failed */ + fatal_error("fork"); + } else if (parent > 0) { + int status; + /* fork() succeeded. Now in the parent. */ + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("arch_prctl test parent exit"); + return; + } + /* fork() succeeded. Now in the child . */ + + printf("[RUN]\tCheck ARCH_REQ_XCOMP_PERM around process fork() and sigaltack() test.\n"); + + printf("\tFork a child.\n"); + child = fork(); + if (child < 0) { + fatal_error("fork"); + } else if (child > 0) { + int status; + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("arch_prctl test child exit"); + _exit(0); + } + + /* + * The permission request should fail without an + * XTILEDATA-compatible signal stack + */ + printf("\tTest XCOMP_PERM at child.\n"); + validate_xcomp_perm(FAIL_EXPECTED); + + /* + * Set up an XTILEDATA-compatible signal stack and + * also obtain permission to populate XTILEDATA. + */ + printf("\tTest dynamic sigaltstack at child:\n"); + test_dynamic_sigaltstack(); + + /* Ensure that XTILEDATA can be populated. */ + printf("\tTest XCOMP_PERM again at child.\n"); + validate_xcomp_perm(SUCCESS_EXPECTED); + + printf("\tFork a grandchild.\n"); + grandchild = fork(); + if (grandchild < 0) { + /* fork() failed */ + fatal_error("fork"); + } else if (!grandchild) { + /* fork() succeeded. Now in the (grand)child. */ + printf("\tTest XCOMP_PERM at grandchild.\n"); + + /* + * Ensure that the grandchild inherited + * permission and a compatible sigaltstack: + */ + validate_xcomp_perm(SUCCESS_EXPECTED); + } else { + int status; + /* fork() succeeded. Now in the parent. */ + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("fork test grandchild"); + } + + _exit(0); +} + +static inline int __compare_tiledata_state(struct xsave_buffer *xbuf1, struct xsave_buffer *xbuf2) +{ + return memcmp(&xbuf1->bytes[xtiledata.xbuf_offset], + &xbuf2->bytes[xtiledata.xbuf_offset], + xtiledata.size); +} + +/* + * Save current register state and compare it to @xbuf1.' + * + * Returns false if @xbuf1 matches the registers. + * Returns true if @xbuf1 differs from the registers. + */ +static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1) +{ + struct xsave_buffer *xbuf2; + int ret; + + xbuf2 = alloc_xbuf(); + if (!xbuf2) + fatal_error("failed to allocate XSAVE buffer\n"); + + xsave(xbuf2, XFEATURE_MASK_XTILEDATA); + ret = __compare_tiledata_state(xbuf1, xbuf2); + + free(xbuf2); + + if (ret == 0) + return false; + return true; +} + +static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf) +{ + int ret = __validate_tiledata_regs(xbuf); + + if (ret != 0) + fatal_error("TILEDATA registers changed"); +} + +static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf) +{ + int ret = __validate_tiledata_regs(xbuf); + + if (ret == 0) + fatal_error("TILEDATA registers did not change"); +} + +/* tiledata inheritance test */ + +static void test_fork(void) +{ + pid_t child, grandchild; + + child = fork(); + if (child < 0) { + /* fork() failed */ + fatal_error("fork"); + } else if (child > 0) { + /* fork() succeeded. Now in the parent. */ + int status; + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("fork test child"); + return; + } + /* fork() succeeded. Now in the child. */ + printf("[RUN]\tCheck tile data inheritance.\n\tBefore fork(), load tiledata\n"); + + load_rand_tiledata(stashed_xsave); + + grandchild = fork(); + if (grandchild < 0) { + /* fork() failed */ + fatal_error("fork"); + } else if (grandchild > 0) { + /* fork() succeeded. Still in the first child. */ + int status; + + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + fatal_error("fork test grand child"); + _exit(0); + } + /* fork() succeeded. Now in the (grand)child. */ + + /* + * TILEDATA registers are not preserved across fork(). + * Ensure that their value has changed: + */ + validate_tiledata_regs_changed(stashed_xsave); + + _exit(0); +} + +/* Context switching test */ + +static struct _ctxtswtest_cfg { + unsigned int iterations; + unsigned int num_threads; +} ctxtswtest_config; + +struct futex_info { + pthread_t thread; + int nr; + pthread_mutex_t mutex; + struct futex_info *next; +}; + +static void *check_tiledata(void *info) +{ + struct futex_info *finfo = (struct futex_info *)info; + struct xsave_buffer *xbuf; + int i; + + xbuf = alloc_xbuf(); + if (!xbuf) + fatal_error("unable to allocate XSAVE buffer"); + + /* + * Load random data into 'xbuf' and then restore + * it to the tile registers themselves. + */ + load_rand_tiledata(xbuf); + for (i = 0; i < ctxtswtest_config.iterations; i++) { + pthread_mutex_lock(&finfo->mutex); + + /* + * Ensure the register values have not + * diverged from those recorded in 'xbuf'. + */ + validate_tiledata_regs_same(xbuf); + + /* Load new, random values into xbuf and registers */ + load_rand_tiledata(xbuf); + + /* + * The last thread's last unlock will be for + * thread 0's mutex. However, thread 0 will + * have already exited the loop and the mutex + * will already be unlocked. + * + * Because this is not an ERRORCHECK mutex, + * that inconsistency will be silently ignored. + */ + pthread_mutex_unlock(&finfo->next->mutex); + } + + free(xbuf); + /* + * Return this thread's finfo, which is + * a unique value for this thread. + */ + return finfo; +} + +static int create_threads(int num, struct futex_info *finfo) +{ + int i; + + for (i = 0; i < num; i++) { + int next_nr; + + finfo[i].nr = i; + /* + * Thread 'i' will wait on this mutex to + * be unlocked. Lock it immediately after + * initialization: + */ + pthread_mutex_init(&finfo[i].mutex, NULL); + pthread_mutex_lock(&finfo[i].mutex); + + next_nr = (i + 1) % num; + finfo[i].next = &finfo[next_nr]; + + if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i])) + fatal_error("pthread_create()"); + } + return 0; +} + +static void affinitize_cpu0(void) +{ + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + fatal_error("sched_setaffinity to CPU 0"); +} + +static void test_context_switch(void) +{ + struct futex_info *finfo; + int i; + + /* Affinitize to one CPU to force context switches */ + affinitize_cpu0(); + + req_xtiledata_perm(); + + printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n", + ctxtswtest_config.iterations, + ctxtswtest_config.num_threads); + + + finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads); + if (!finfo) + fatal_error("malloc()"); + + create_threads(ctxtswtest_config.num_threads, finfo); + + /* + * This thread wakes up thread 0 + * Thread 0 will wake up 1 + * Thread 1 will wake up 2 + * ... + * the last thread will wake up 0 + * + * ... this will repeat for the configured + * number of iterations. + */ + pthread_mutex_unlock(&finfo[0].mutex); + + /* Wait for all the threads to finish: */ + for (i = 0; i < ctxtswtest_config.num_threads; i++) { + void *thread_retval; + int rc; + + rc = pthread_join(finfo[i].thread, &thread_retval); + + if (rc) + fatal_error("pthread_join() failed for thread %d err: %d\n", + i, rc); + + if (thread_retval != &finfo[i]) + fatal_error("unexpected thread retval for thread %d: %p\n", + i, thread_retval); + + } + + printf("[OK]\tNo incorrect case was found.\n"); + + free(finfo); +} + +/* Ptrace test */ + +/* + * Make sure the ptracee has the expanded kernel buffer on the first + * use. Then, initialize the state before performing the state + * injection from the ptracer. + */ +static inline void ptracee_firstuse_tiledata(void) +{ + load_rand_tiledata(stashed_xsave); + init_xtiledata(); +} + +/* + * Ptracer injects the randomized tile data state. It also reads + * before and after that, which will execute the kernel's state copy + * functions. So, the tester is advised to double-check any emitted + * kernel messages. + */ +static void ptracer_inject_tiledata(pid_t target) +{ + struct xsave_buffer *xbuf; + struct iovec iov; + + xbuf = alloc_xbuf(); + if (!xbuf) + fatal_error("unable to allocate XSAVE buffer"); + + printf("\tRead the init'ed tiledata via ptrace().\n"); + + iov.iov_base = xbuf; + iov.iov_len = xbuf_size; + + memset(stashed_xsave, 0, xbuf_size); + + if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + fatal_error("PTRACE_GETREGSET"); + + if (!__compare_tiledata_state(stashed_xsave, xbuf)) + printf("[OK]\tThe init'ed tiledata was read from ptracee.\n"); + else + printf("[FAIL]\tThe init'ed tiledata was not read from ptracee.\n"); + + printf("\tInject tiledata via ptrace().\n"); + + load_rand_tiledata(xbuf); + + memcpy(&stashed_xsave->bytes[xtiledata.xbuf_offset], + &xbuf->bytes[xtiledata.xbuf_offset], + xtiledata.size); + + if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + fatal_error("PTRACE_SETREGSET"); + + if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov)) + fatal_error("PTRACE_GETREGSET"); + + if (!__compare_tiledata_state(stashed_xsave, xbuf)) + printf("[OK]\tTiledata was correctly written to ptracee.\n"); + else + printf("[FAIL]\tTiledata was not correctly written to ptracee.\n"); +} + +static void test_ptrace(void) +{ + pid_t child; + int status; + + child = fork(); + if (child < 0) { + err(1, "fork"); + } else if (!child) { + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL)) + err(1, "PTRACE_TRACEME"); + + ptracee_firstuse_tiledata(); + + raise(SIGTRAP); + _exit(0); + } + + do { + wait(&status); + } while (WSTOPSIG(status) != SIGTRAP); + + ptracer_inject_tiledata(child); + + ptrace(PTRACE_DETACH, child, NULL, NULL); + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status)) + err(1, "ptrace test"); +} + +int main(void) +{ + /* Check hardware availability at first */ + check_cpuid_xsave(); + check_cpuid_xtiledata(); + + init_stashed_xsave(); + sethandler(SIGILL, handle_noperm, 0); + + test_dynamic_state(); + + /* Request permission for the following tests */ + req_xtiledata_perm(); + + test_fork(); + + ctxtswtest_config.iterations = 10; + ctxtswtest_config.num_threads = 5; + test_context_switch(); + + test_ptrace(); + + clearhandler(SIGILL); + free_stashed_xsave(); + + return 0; +} diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh index 3e2089c8cf54..8c669c0d662e 100755 --- a/tools/testing/selftests/x86/check_cc.sh +++ b/tools/testing/selftests/x86/check_cc.sh @@ -7,7 +7,7 @@ CC="$1" TESTPROG="$2" shift 2 -if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then +if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then echo 1 else echo 0 diff --git a/tools/testing/selftests/x86/corrupt_xstate_header.c b/tools/testing/selftests/x86/corrupt_xstate_header.c new file mode 100644 index 000000000000..cf9ce8fbb656 --- /dev/null +++ b/tools/testing/selftests/x86/corrupt_xstate_header.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Corrupt the XSTATE header in a signal frame + * + * Based on analysis and a test case from Thomas Gleixner. + */ + +#define _GNU_SOURCE + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sched.h> +#include <signal.h> +#include <err.h> +#include <unistd.h> +#include <stdint.h> +#include <sys/wait.h> + +#include "../kselftest.h" /* For __cpuid_count() */ + +static inline int xsave_enabled(void) +{ + unsigned int eax, ebx, ecx, edx; + + __cpuid_count(0x1, 0x0, eax, ebx, ecx, edx); + + /* Is CR4.OSXSAVE enabled ? */ + return ecx & (1U << 27); +} + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void sigusr1(int sig, siginfo_t *info, void *uc_void) +{ + ucontext_t *uc = uc_void; + uint8_t *fpstate = (uint8_t *)uc->uc_mcontext.fpregs; + uint64_t *xfeatures = (uint64_t *)(fpstate + 512); + + printf("\tWreck XSTATE header\n"); + /* Wreck the first reserved bytes in the header */ + *(xfeatures + 2) = 0xfffffff; +} + +static void sigsegv(int sig, siginfo_t *info, void *uc_void) +{ + printf("\tGot SIGSEGV\n"); +} + +int main(void) +{ + cpu_set_t set; + + sethandler(SIGUSR1, sigusr1, 0); + sethandler(SIGSEGV, sigsegv, 0); + + if (!xsave_enabled()) { + printf("[SKIP] CR4.OSXSAVE disabled.\n"); + return 0; + } + + CPU_ZERO(&set); + CPU_SET(0, &set); + + /* + * Enforce that the child runs on the same CPU + * which in turn forces a schedule. + */ + sched_setaffinity(getpid(), sizeof(set), &set); + + printf("[RUN]\tSend ourselves a signal\n"); + raise(SIGUSR1); + + printf("[OK]\tBack from the signal. Now schedule.\n"); + pid_t child = fork(); + if (child < 0) + err(1, "fork"); + if (child == 0) + return 0; + if (child) + waitpid(child, NULL, 0); + printf("[OK]\tBack in the main thread.\n"); + + /* + * We could try to confirm that extended state is still preserved + * when we schedule. For now, the only indication of failure is + * a warning in the kernel logs. + */ + + return 0; +} diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 15a329da59fa..8c780cce941d 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -285,7 +285,8 @@ static unsigned short load_gs(void) /* 32-bit set_thread_area */ long ret; asm volatile ("int $0x80" - : "=a" (ret) : "a" (243), "b" (low_desc) + : "=a" (ret), "+m" (*low_desc) + : "a" (243), "b" (low_desc) : "r8", "r9", "r10", "r11"); memcpy(&desc, low_desc, sizeof(desc)); munmap(low_desc, sizeof(desc)); @@ -391,8 +392,8 @@ static void set_gs_and_switch_to(unsigned long local, local = read_base(GS); /* - * Signal delivery seems to mess up weird selectors. Put it - * back. + * Signal delivery is quite likely to change a selector + * of 1, 2, or 3 back to 0 due to IRET being defective. */ asm volatile ("mov %0, %%gs" : : "rm" (force_sel)); } else { @@ -410,6 +411,14 @@ static void set_gs_and_switch_to(unsigned long local, if (base == local && sel_pre_sched == sel_post_sched) { printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n", sel_pre_sched, local); + } else if (base == local && sel_pre_sched >= 1 && sel_pre_sched <= 3 && + sel_post_sched == 0) { + /* + * IRET is misdesigned and will squash selectors 1, 2, or 3 + * to zero. Don't fail the test just because this happened. + */ + printf("[OK]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx because IRET is defective\n", + sel_pre_sched, local, sel_post_sched, base); } else { nerrs++; printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n", @@ -442,6 +451,68 @@ static void test_unexpected_base(void) #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r) +static void test_ptrace_write_gs_read_base(void) +{ + int status; + pid_t child = fork(); + + if (child < 0) + err(1, "fork"); + + if (child == 0) { + printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n"); + + printf("[RUN]\tARCH_SET_GS to 1\n"); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0) + err(1, "ARCH_SET_GS"); + + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) + err(1, "PTRACE_TRACEME"); + + raise(SIGTRAP); + _exit(0); + } + + wait(&status); + + if (WSTOPSIG(status) == SIGTRAP) { + unsigned long base; + unsigned long gs_offset = USER_REGS_OFFSET(gs); + unsigned long base_offset = USER_REGS_OFFSET(gs_base); + + /* Read the initial base. It should be 1. */ + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); + if (base == 1) { + printf("[OK]\tGSBASE started at 1\n"); + } else { + nerrs++; + printf("[FAIL]\tGSBASE started at 0x%lx\n", base); + } + + printf("[RUN]\tSet GS = 0x7, read GSBASE\n"); + + /* Poke an LDT selector into GS. */ + if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0) + err(1, "PTRACE_POKEUSER"); + + /* And read the base. */ + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); + + if (base == 0 || base == 1) { + printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base); + } else { + nerrs++; + printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base); + } + } + + ptrace(PTRACE_CONT, child, NULL, NULL); + + wait(&status); + if (!WIFEXITED(status)) + printf("[WARN]\tChild didn't exit cleanly.\n"); +} + static void test_ptrace_write_gsbase(void) { int status; @@ -489,16 +560,36 @@ static void test_ptrace_write_gsbase(void) * selector value is changed or not by the GSBASE write in * a ptracer. */ - if (gs == 0 && base == 0xFF) { - printf("[OK]\tGS was reset as expected\n"); - } else { + if (gs != *shared_scratch) { nerrs++; - printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 0xFF)\n", gs, base); + printf("[FAIL]\tGS changed to %lx\n", gs); + + /* + * On older kernels, poking a nonzero value into the + * base would zero the selector. On newer kernels, + * this behavior has changed -- poking the base + * changes only the base and, if FSGSBASE is not + * available, this may have no effect once the tracee + * is resumed. + */ + if (gs == 0) + printf("\tNote: this is expected behavior on older kernels.\n"); + } else if (have_fsgsbase && (base != 0xFF)) { + nerrs++; + printf("[FAIL]\tGSBASE changed to %lx\n", base); + } else { + printf("[OK]\tGS remained 0x%hx", *shared_scratch); + if (have_fsgsbase) + printf(" and GSBASE changed to 0xFF"); + printf("\n"); } } END: ptrace(PTRACE_CONT, child, NULL, NULL); + wait(&status); + if (!WIFEXITED(status)) + printf("[WARN]\tChild didn't exit cleanly.\n"); } int main() @@ -508,6 +599,9 @@ int main() shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + /* Do these tests before we have an LDT. */ + test_ptrace_write_gs_read_base(); + /* Probe FSGSBASE */ sethandler(SIGILL, sigill, 0); if (sigsetjmp(jmpbuf, 1) == 0) { diff --git a/tools/testing/selftests/x86/fsgsbase_restore.c b/tools/testing/selftests/x86/fsgsbase_restore.c new file mode 100644 index 000000000000..6fffadc51579 --- /dev/null +++ b/tools/testing/selftests/x86/fsgsbase_restore.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * fsgsbase_restore.c, test ptrace vs fsgsbase + * Copyright (c) 2020 Andy Lutomirski + * + * This test case simulates a tracer redirecting tracee execution to + * a function and then restoring tracee state using PTRACE_GETREGS and + * PTRACE_SETREGS. This is similar to what gdb does when doing + * 'p func()'. The catch is that this test has the called function + * modify a segment register. This makes sure that ptrace correctly + * restores segment state when using PTRACE_SETREGS. + * + * This is not part of fsgsbase.c, because that test is 64-bit only. + */ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <err.h> +#include <sys/user.h> +#include <asm/prctl.h> +#include <sys/prctl.h> +#include <asm/ldt.h> +#include <sys/mman.h> +#include <stddef.h> +#include <sys/ptrace.h> +#include <sys/wait.h> +#include <stdint.h> + +#define EXPECTED_VALUE 0x1337f00d + +#ifdef __x86_64__ +# define SEG "%gs" +#else +# define SEG "%fs" +#endif + +static unsigned int dereference_seg_base(void) +{ + int ret; + asm volatile ("mov %" SEG ":(0), %0" : "=rm" (ret)); + return ret; +} + +static void init_seg(void) +{ + unsigned int *target = mmap( + NULL, sizeof(unsigned int), + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); + if (target == MAP_FAILED) + err(1, "mmap"); + + *target = EXPECTED_VALUE; + + printf("\tsegment base address = 0x%lx\n", (unsigned long)target); + + struct user_desc desc = { + .entry_number = 0, + .base_addr = (unsigned int)(uintptr_t)target, + .limit = sizeof(unsigned int) - 1, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) { + printf("\tusing LDT slot 0\n"); + asm volatile ("mov %0, %" SEG :: "rm" ((unsigned short)0x7)); + } else { + /* No modify_ldt for us (configured out, perhaps) */ + + struct user_desc *low_desc = mmap( + NULL, sizeof(desc), + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); + memcpy(low_desc, &desc, sizeof(desc)); + + low_desc->entry_number = -1; + + /* 32-bit set_thread_area */ + long ret; + asm volatile ("int $0x80" + : "=a" (ret), "+m" (*low_desc) + : "a" (243), "b" (low_desc) +#ifdef __x86_64__ + : "r8", "r9", "r10", "r11" +#endif + ); + memcpy(&desc, low_desc, sizeof(desc)); + munmap(low_desc, sizeof(desc)); + + if (ret != 0) { + printf("[NOTE]\tcould not create a segment -- can't test anything\n"); + exit(0); + } + printf("\tusing GDT slot %d\n", desc.entry_number); + + unsigned short sel = (unsigned short)((desc.entry_number << 3) | 0x3); + asm volatile ("mov %0, %" SEG :: "rm" (sel)); + } +} + +static void tracee_zap_segment(void) +{ + /* + * The tracer will redirect execution here. This is meant to + * work like gdb's 'p func()' feature. The tricky bit is that + * we modify a segment register in order to make sure that ptrace + * can correctly restore segment registers. + */ + printf("\tTracee: in tracee_zap_segment()\n"); + + /* + * Write a nonzero selector with base zero to the segment register. + * Using a null selector would defeat the test on AMD pre-Zen2 + * CPUs, as such CPUs don't clear the base when loading a null + * selector. + */ + unsigned short sel; + asm volatile ("mov %%ss, %0\n\t" + "mov %0, %" SEG + : "=rm" (sel)); + + pid_t pid = getpid(), tid = syscall(SYS_gettid); + + printf("\tTracee is going back to sleep\n"); + syscall(SYS_tgkill, pid, tid, SIGSTOP); + + /* Should not get here. */ + while (true) { + printf("[FAIL]\tTracee hit unreachable code\n"); + pause(); + } +} + +int main() +{ + printf("\tSetting up a segment\n"); + init_seg(); + + unsigned int val = dereference_seg_base(); + if (val != EXPECTED_VALUE) { + printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE); + return 1; + } + printf("[OK]\tThe segment points to the right place.\n"); + + pid_t chld = fork(); + if (chld < 0) + err(1, "fork"); + + if (chld == 0) { + prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0, 0); + + if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0) + err(1, "PTRACE_TRACEME"); + + pid_t pid = getpid(), tid = syscall(SYS_gettid); + + printf("\tTracee will take a nap until signaled\n"); + syscall(SYS_tgkill, pid, tid, SIGSTOP); + + printf("\tTracee was resumed. Will re-check segment.\n"); + + val = dereference_seg_base(); + if (val != EXPECTED_VALUE) { + printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE); + exit(1); + } + + printf("[OK]\tThe segment points to the right place.\n"); + exit(0); + } + + int status; + + /* Wait for SIGSTOP. */ + if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status)) + err(1, "waitpid"); + + struct user_regs_struct regs; + + if (ptrace(PTRACE_GETREGS, chld, NULL, ®s) != 0) + err(1, "PTRACE_GETREGS"); + +#ifdef __x86_64__ + printf("\tChild GS=0x%lx, GSBASE=0x%lx\n", (unsigned long)regs.gs, (unsigned long)regs.gs_base); +#else + printf("\tChild FS=0x%lx\n", (unsigned long)regs.xfs); +#endif + + struct user_regs_struct regs2 = regs; +#ifdef __x86_64__ + regs2.rip = (unsigned long)tracee_zap_segment; + regs2.rsp -= 128; /* Don't clobber the redzone. */ +#else + regs2.eip = (unsigned long)tracee_zap_segment; +#endif + + printf("\tTracer: redirecting tracee to tracee_zap_segment()\n"); + if (ptrace(PTRACE_SETREGS, chld, NULL, ®s2) != 0) + err(1, "PTRACE_GETREGS"); + if (ptrace(PTRACE_CONT, chld, NULL, NULL) != 0) + err(1, "PTRACE_GETREGS"); + + /* Wait for SIGSTOP. */ + if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status)) + err(1, "waitpid"); + + printf("\tTracer: restoring tracee state\n"); + if (ptrace(PTRACE_SETREGS, chld, NULL, ®s) != 0) + err(1, "PTRACE_GETREGS"); + if (ptrace(PTRACE_DETACH, chld, NULL, NULL) != 0) + err(1, "PTRACE_GETREGS"); + + /* Wait for SIGSTOP. */ + if (waitpid(chld, &status, 0) != chld) + err(1, "waitpid"); + + if (WIFSIGNALED(status)) { + printf("[FAIL]\tTracee crashed\n"); + return 1; + } + + if (!WIFEXITED(status)) { + printf("[FAIL]\tTracee stopped for an unexpected reason: %d\n", status); + return 1; + } + + int exitcode = WEXITSTATUS(status); + if (exitcode != 0) { + printf("[FAIL]\tTracee reported failure\n"); + return 1; + } + + printf("[OK]\tAll is well.\n"); + return 0; +} diff --git a/tools/testing/selftests/x86/helpers.h b/tools/testing/selftests/x86/helpers.h new file mode 100644 index 000000000000..4ef42c4559a9 --- /dev/null +++ b/tools/testing/selftests/x86/helpers.h @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef __SELFTESTS_X86_HELPERS_H +#define __SELFTESTS_X86_HELPERS_H + +#include <asm/processor-flags.h> + +static inline unsigned long get_eflags(void) +{ +#ifdef __x86_64__ + return __builtin_ia32_readeflags_u64(); +#else + return __builtin_ia32_readeflags_u32(); +#endif +} + +static inline void set_eflags(unsigned long eflags) +{ +#ifdef __x86_64__ + __builtin_ia32_writeeflags_u64(eflags); +#else + __builtin_ia32_writeeflags_u32(eflags); +#endif +} + +#endif /* __SELFTESTS_X86_HELPERS_H */ diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c index bab2f6e06b63..7e3e09c1abac 100644 --- a/tools/testing/selftests/x86/iopl.c +++ b/tools/testing/selftests/x86/iopl.c @@ -85,48 +85,88 @@ static void expect_gp_outb(unsigned short port) printf("[OK]\toutb to 0x%02hx failed\n", port); } -static bool try_cli(void) +#define RET_FAULTED 0 +#define RET_FAIL 1 +#define RET_EMUL 2 + +static int try_cli(void) { + unsigned long flags; + sethandler(SIGSEGV, sigsegv, SA_RESETHAND); if (sigsetjmp(jmpbuf, 1) != 0) { - return false; + return RET_FAULTED; } else { - asm volatile ("cli"); - return true; + asm volatile("cli; pushf; pop %[flags]" + : [flags] "=rm" (flags)); + + /* X86_FLAGS_IF */ + if (!(flags & (1 << 9))) + return RET_FAIL; + else + return RET_EMUL; } clearhandler(SIGSEGV); } -static bool try_sti(void) +static int try_sti(bool irqs_off) { + unsigned long flags; + sethandler(SIGSEGV, sigsegv, SA_RESETHAND); if (sigsetjmp(jmpbuf, 1) != 0) { - return false; + return RET_FAULTED; } else { - asm volatile ("sti"); - return true; + asm volatile("sti; pushf; pop %[flags]" + : [flags] "=rm" (flags)); + + /* X86_FLAGS_IF */ + if (irqs_off && (flags & (1 << 9))) + return RET_FAIL; + else + return RET_EMUL; } clearhandler(SIGSEGV); } -static void expect_gp_sti(void) +static void expect_gp_sti(bool irqs_off) { - if (try_sti()) { + int ret = try_sti(irqs_off); + + switch (ret) { + case RET_FAULTED: + printf("[OK]\tSTI faulted\n"); + break; + case RET_EMUL: + printf("[OK]\tSTI NOPped\n"); + break; + default: printf("[FAIL]\tSTI worked\n"); nerrs++; - } else { - printf("[OK]\tSTI faulted\n"); } } -static void expect_gp_cli(void) +/* + * Returns whether it managed to disable interrupts. + */ +static bool test_cli(void) { - if (try_cli()) { + int ret = try_cli(); + + switch (ret) { + case RET_FAULTED: + printf("[OK]\tCLI faulted\n"); + break; + case RET_EMUL: + printf("[OK]\tCLI NOPped\n"); + break; + default: printf("[FAIL]\tCLI worked\n"); nerrs++; - } else { - printf("[OK]\tCLI faulted\n"); + return true; } + + return false; } int main(void) @@ -152,8 +192,7 @@ int main(void) } /* Make sure that CLI/STI are blocked even with IOPL level 3 */ - expect_gp_cli(); - expect_gp_sti(); + expect_gp_sti(test_cli()); expect_ok_outb(0x80); /* Establish an I/O bitmap to test the restore */ @@ -204,8 +243,7 @@ int main(void) printf("[RUN]\tparent: write to 0x80 (should fail)\n"); expect_gp_outb(0x80); - expect_gp_cli(); - expect_gp_sti(); + expect_gp_sti(test_cli()); /* Test the capability checks. */ printf("\tiopl(3)\n"); diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c new file mode 100644 index 000000000000..215b8150b7cc --- /dev/null +++ b/tools/testing/selftests/x86/lam.c @@ -0,0 +1,1241 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/syscall.h> +#include <time.h> +#include <signal.h> +#include <setjmp.h> +#include <sys/mman.h> +#include <sys/utsname.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <inttypes.h> +#include <sched.h> + +#include <sys/uio.h> +#include <linux/io_uring.h> +#include "../kselftest.h" + +#ifndef __x86_64__ +# error This test is 64-bit only +#endif + +/* LAM modes, these definitions were copied from kernel code */ +#define LAM_NONE 0 +#define LAM_U57_BITS 6 + +#define LAM_U57_MASK (0x3fULL << 57) +/* arch prctl for LAM */ +#define ARCH_GET_UNTAG_MASK 0x4001 +#define ARCH_ENABLE_TAGGED_ADDR 0x4002 +#define ARCH_GET_MAX_TAG_BITS 0x4003 +#define ARCH_FORCE_TAGGED_SVA 0x4004 + +/* Specified test function bits */ +#define FUNC_MALLOC 0x1 +#define FUNC_BITS 0x2 +#define FUNC_MMAP 0x4 +#define FUNC_SYSCALL 0x8 +#define FUNC_URING 0x10 +#define FUNC_INHERITE 0x20 +#define FUNC_PASID 0x40 + +#define TEST_MASK 0x7f + +#define LOW_ADDR (0x1UL << 30) +#define HIGH_ADDR (0x3UL << 48) + +#define MALLOC_LEN 32 + +#define PAGE_SIZE (4 << 10) + +#define STACK_SIZE 65536 + +#define barrier() ({ \ + __asm__ __volatile__("" : : : "memory"); \ +}) + +#define URING_QUEUE_SZ 1 +#define URING_BLOCK_SZ 2048 + +/* Pasid test define */ +#define LAM_CMD_BIT 0x1 +#define PAS_CMD_BIT 0x2 +#define SVA_CMD_BIT 0x4 + +#define PAS_CMD(cmd1, cmd2, cmd3) (((cmd3) << 8) | ((cmd2) << 4) | ((cmd1) << 0)) + +struct testcases { + unsigned int later; + int expected; /* 2: SIGSEGV Error; 1: other errors */ + unsigned long lam; + uint64_t addr; + uint64_t cmd; + int (*test_func)(struct testcases *test); + const char *msg; +}; + +/* Used by CQ of uring, source file handler and file's size */ +struct file_io { + int file_fd; + off_t file_sz; + struct iovec iovecs[]; +}; + +struct io_uring_queue { + unsigned int *head; + unsigned int *tail; + unsigned int *ring_mask; + unsigned int *ring_entries; + unsigned int *flags; + unsigned int *array; + union { + struct io_uring_cqe *cqes; + struct io_uring_sqe *sqes; + } queue; + size_t ring_sz; +}; + +struct io_ring { + int ring_fd; + struct io_uring_queue sq_ring; + struct io_uring_queue cq_ring; +}; + +int tests_cnt; +jmp_buf segv_env; + +static void segv_handler(int sig) +{ + ksft_print_msg("Get segmentation fault(%d).", sig); + + siglongjmp(segv_env, 1); +} + +static inline int cpu_has_lam(void) +{ + unsigned int cpuinfo[4]; + + __cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); + + return (cpuinfo[0] & (1 << 26)); +} + +/* Check 5-level page table feature in CPUID.(EAX=07H, ECX=00H):ECX.[bit 16] */ +static inline int cpu_has_la57(void) +{ + unsigned int cpuinfo[4]; + + __cpuid_count(0x7, 0, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); + + return (cpuinfo[2] & (1 << 16)); +} + +/* + * Set tagged address and read back untag mask. + * check if the untagged mask is expected. + * + * @return: + * 0: Set LAM mode successfully + * others: failed to set LAM + */ +static int set_lam(unsigned long lam) +{ + int ret = 0; + uint64_t ptr = 0; + + if (lam != LAM_U57_BITS && lam != LAM_NONE) + return -1; + + /* Skip check return */ + syscall(SYS_arch_prctl, ARCH_ENABLE_TAGGED_ADDR, lam); + + /* Get untagged mask */ + syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr); + + /* Check mask returned is expected */ + if (lam == LAM_U57_BITS) + ret = (ptr != ~(LAM_U57_MASK)); + else if (lam == LAM_NONE) + ret = (ptr != -1ULL); + + return ret; +} + +static unsigned long get_default_tag_bits(void) +{ + pid_t pid; + int lam = LAM_NONE; + int ret = 0; + + pid = fork(); + if (pid < 0) { + perror("Fork failed."); + } else if (pid == 0) { + /* Set LAM mode in child process */ + if (set_lam(LAM_U57_BITS) == 0) + lam = LAM_U57_BITS; + else + lam = LAM_NONE; + exit(lam); + } else { + wait(&ret); + lam = WEXITSTATUS(ret); + } + + return lam; +} + +/* + * Set tagged address and read back untag mask. + * check if the untag mask is expected. + */ +static int get_lam(void) +{ + uint64_t ptr = 0; + int ret = -1; + /* Get untagged mask */ + if (syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr) == -1) + return -1; + + /* Check mask returned is expected */ + if (ptr == ~(LAM_U57_MASK)) + ret = LAM_U57_BITS; + else if (ptr == -1ULL) + ret = LAM_NONE; + + + return ret; +} + +/* According to LAM mode, set metadata in high bits */ +static uint64_t set_metadata(uint64_t src, unsigned long lam) +{ + uint64_t metadata; + + srand(time(NULL)); + + switch (lam) { + case LAM_U57_BITS: /* Set metadata in bits 62:57 */ + /* Get a random non-zero value as metadata */ + metadata = (rand() % ((1UL << LAM_U57_BITS) - 1) + 1) << 57; + metadata |= (src & ~(LAM_U57_MASK)); + break; + default: + metadata = src; + break; + } + + return metadata; +} + +/* + * Set metadata in user pointer, compare new pointer with original pointer. + * both pointers should point to the same address. + * + * @return: + * 0: value on the pointer with metadate and value on original are same + * 1: not same. + */ +static int handle_lam_test(void *src, unsigned int lam) +{ + char *ptr; + + strcpy((char *)src, "USER POINTER"); + + ptr = (char *)set_metadata((uint64_t)src, lam); + if (src == ptr) + return 0; + + /* Copy a string into the pointer with metadata */ + strcpy((char *)ptr, "METADATA POINTER"); + + return (!!strcmp((char *)src, (char *)ptr)); +} + + +int handle_max_bits(struct testcases *test) +{ + unsigned long exp_bits = get_default_tag_bits(); + unsigned long bits = 0; + + if (exp_bits != LAM_NONE) + exp_bits = LAM_U57_BITS; + + /* Get LAM max tag bits */ + if (syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits) == -1) + return 1; + + return (exp_bits != bits); +} + +/* + * Test lam feature through dereference pointer get from malloc. + * @return 0: Pass test. 1: Get failure during test 2: Get SIGSEGV + */ +static int handle_malloc(struct testcases *test) +{ + char *ptr = NULL; + int ret = 0; + + if (test->later == 0 && test->lam != 0) + if (set_lam(test->lam) == -1) + return 1; + + ptr = (char *)malloc(MALLOC_LEN); + if (ptr == NULL) { + perror("malloc() failure\n"); + return 1; + } + + /* Set signal handler */ + if (sigsetjmp(segv_env, 1) == 0) { + signal(SIGSEGV, segv_handler); + ret = handle_lam_test(ptr, test->lam); + } else { + ret = 2; + } + + if (test->later != 0 && test->lam != 0) + if (set_lam(test->lam) == -1 && ret == 0) + ret = 1; + + free(ptr); + + return ret; +} + +static int handle_mmap(struct testcases *test) +{ + void *ptr; + unsigned int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; + int ret = 0; + + if (test->later == 0 && test->lam != 0) + if (set_lam(test->lam) != 0) + return 1; + + ptr = mmap((void *)test->addr, PAGE_SIZE, PROT_READ | PROT_WRITE, + flags, -1, 0); + if (ptr == MAP_FAILED) { + if (test->addr == HIGH_ADDR) + if (!cpu_has_la57()) + return 3; /* unsupport LA57 */ + return 1; + } + + if (test->later != 0 && test->lam != 0) + if (set_lam(test->lam) != 0) + ret = 1; + + if (ret == 0) { + if (sigsetjmp(segv_env, 1) == 0) { + signal(SIGSEGV, segv_handler); + ret = handle_lam_test(ptr, test->lam); + } else { + ret = 2; + } + } + + munmap(ptr, PAGE_SIZE); + return ret; +} + +static int handle_syscall(struct testcases *test) +{ + struct utsname unme, *pu; + int ret = 0; + + if (test->later == 0 && test->lam != 0) + if (set_lam(test->lam) != 0) + return 1; + + if (sigsetjmp(segv_env, 1) == 0) { + signal(SIGSEGV, segv_handler); + pu = (struct utsname *)set_metadata((uint64_t)&unme, test->lam); + ret = uname(pu); + if (ret < 0) + ret = 1; + } else { + ret = 2; + } + + if (test->later != 0 && test->lam != 0) + if (set_lam(test->lam) != -1 && ret == 0) + ret = 1; + + return ret; +} + +int sys_uring_setup(unsigned int entries, struct io_uring_params *p) +{ + return (int)syscall(__NR_io_uring_setup, entries, p); +} + +int sys_uring_enter(int fd, unsigned int to, unsigned int min, unsigned int flags) +{ + return (int)syscall(__NR_io_uring_enter, fd, to, min, flags, NULL, 0); +} + +/* Init submission queue and completion queue */ +int mmap_io_uring(struct io_uring_params p, struct io_ring *s) +{ + struct io_uring_queue *sring = &s->sq_ring; + struct io_uring_queue *cring = &s->cq_ring; + + sring->ring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned int); + cring->ring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe); + + if (p.features & IORING_FEAT_SINGLE_MMAP) { + if (cring->ring_sz > sring->ring_sz) + sring->ring_sz = cring->ring_sz; + + cring->ring_sz = sring->ring_sz; + } + + void *sq_ptr = mmap(0, sring->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, s->ring_fd, + IORING_OFF_SQ_RING); + + if (sq_ptr == MAP_FAILED) { + perror("sub-queue!"); + return 1; + } + + void *cq_ptr = sq_ptr; + + if (!(p.features & IORING_FEAT_SINGLE_MMAP)) { + cq_ptr = mmap(0, cring->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, s->ring_fd, + IORING_OFF_CQ_RING); + if (cq_ptr == MAP_FAILED) { + perror("cpl-queue!"); + munmap(sq_ptr, sring->ring_sz); + return 1; + } + } + + sring->head = sq_ptr + p.sq_off.head; + sring->tail = sq_ptr + p.sq_off.tail; + sring->ring_mask = sq_ptr + p.sq_off.ring_mask; + sring->ring_entries = sq_ptr + p.sq_off.ring_entries; + sring->flags = sq_ptr + p.sq_off.flags; + sring->array = sq_ptr + p.sq_off.array; + + /* Map a queue as mem map */ + s->sq_ring.queue.sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + s->ring_fd, IORING_OFF_SQES); + if (s->sq_ring.queue.sqes == MAP_FAILED) { + munmap(sq_ptr, sring->ring_sz); + if (sq_ptr != cq_ptr) { + ksft_print_msg("failed to mmap uring queue!"); + munmap(cq_ptr, cring->ring_sz); + return 1; + } + } + + cring->head = cq_ptr + p.cq_off.head; + cring->tail = cq_ptr + p.cq_off.tail; + cring->ring_mask = cq_ptr + p.cq_off.ring_mask; + cring->ring_entries = cq_ptr + p.cq_off.ring_entries; + cring->queue.cqes = cq_ptr + p.cq_off.cqes; + + return 0; +} + +/* Init io_uring queues */ +int setup_io_uring(struct io_ring *s) +{ + struct io_uring_params para; + + memset(¶, 0, sizeof(para)); + s->ring_fd = sys_uring_setup(URING_QUEUE_SZ, ¶); + if (s->ring_fd < 0) + return 1; + + return mmap_io_uring(para, s); +} + +/* + * Get data from completion queue. the data buffer saved the file data + * return 0: success; others: error; + */ +int handle_uring_cq(struct io_ring *s) +{ + struct file_io *fi = NULL; + struct io_uring_queue *cring = &s->cq_ring; + struct io_uring_cqe *cqe; + unsigned int head; + off_t len = 0; + + head = *cring->head; + + do { + barrier(); + if (head == *cring->tail) + break; + /* Get the entry */ + cqe = &cring->queue.cqes[head & *s->cq_ring.ring_mask]; + fi = (struct file_io *)cqe->user_data; + if (cqe->res < 0) + break; + + int blocks = (int)(fi->file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ; + + for (int i = 0; i < blocks; i++) + len += fi->iovecs[i].iov_len; + + head++; + } while (1); + + *cring->head = head; + barrier(); + + return (len != fi->file_sz); +} + +/* + * Submit squeue. specify via IORING_OP_READV. + * the buffer need to be set metadata according to LAM mode + */ +int handle_uring_sq(struct io_ring *ring, struct file_io *fi, unsigned long lam) +{ + int file_fd = fi->file_fd; + struct io_uring_queue *sring = &ring->sq_ring; + unsigned int index = 0, cur_block = 0, tail = 0, next_tail = 0; + struct io_uring_sqe *sqe; + + off_t remain = fi->file_sz; + int blocks = (int)(remain + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ; + + while (remain) { + off_t bytes = remain; + void *buf; + + if (bytes > URING_BLOCK_SZ) + bytes = URING_BLOCK_SZ; + + fi->iovecs[cur_block].iov_len = bytes; + + if (posix_memalign(&buf, URING_BLOCK_SZ, URING_BLOCK_SZ)) + return 1; + + fi->iovecs[cur_block].iov_base = (void *)set_metadata((uint64_t)buf, lam); + remain -= bytes; + cur_block++; + } + + next_tail = *sring->tail; + tail = next_tail; + next_tail++; + + barrier(); + + index = tail & *ring->sq_ring.ring_mask; + + sqe = &ring->sq_ring.queue.sqes[index]; + sqe->fd = file_fd; + sqe->flags = 0; + sqe->opcode = IORING_OP_READV; + sqe->addr = (unsigned long)fi->iovecs; + sqe->len = blocks; + sqe->off = 0; + sqe->user_data = (uint64_t)fi; + + sring->array[index] = index; + tail = next_tail; + + if (*sring->tail != tail) { + *sring->tail = tail; + barrier(); + } + + if (sys_uring_enter(ring->ring_fd, 1, 1, IORING_ENTER_GETEVENTS) < 0) + return 1; + + return 0; +} + +/* + * Test LAM in async I/O and io_uring, read current binery through io_uring + * Set metadata in pointers to iovecs buffer. + */ +int do_uring(unsigned long lam) +{ + struct io_ring *ring; + struct file_io *fi; + struct stat st; + int ret = 1; + char path[PATH_MAX] = {0}; + + /* get current process path */ + if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0) + return 1; + + int file_fd = open(path, O_RDONLY); + + if (file_fd < 0) + return 1; + + if (fstat(file_fd, &st) < 0) + return 1; + + off_t file_sz = st.st_size; + + int blocks = (int)(file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ; + + fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks); + if (!fi) + return 1; + + fi->file_sz = file_sz; + fi->file_fd = file_fd; + + ring = malloc(sizeof(*ring)); + if (!ring) + return 1; + + memset(ring, 0, sizeof(struct io_ring)); + + if (setup_io_uring(ring)) + goto out; + + if (handle_uring_sq(ring, fi, lam)) + goto out; + + ret = handle_uring_cq(ring); + +out: + free(ring); + + for (int i = 0; i < blocks; i++) { + if (fi->iovecs[i].iov_base) { + uint64_t addr = ((uint64_t)fi->iovecs[i].iov_base); + + switch (lam) { + case LAM_U57_BITS: /* Clear bits 62:57 */ + addr = (addr & ~(LAM_U57_MASK)); + break; + } + free((void *)addr); + fi->iovecs[i].iov_base = NULL; + } + } + + free(fi); + + return ret; +} + +int handle_uring(struct testcases *test) +{ + int ret = 0; + + if (test->later == 0 && test->lam != 0) + if (set_lam(test->lam) != 0) + return 1; + + if (sigsetjmp(segv_env, 1) == 0) { + signal(SIGSEGV, segv_handler); + ret = do_uring(test->lam); + } else { + ret = 2; + } + + return ret; +} + +static int fork_test(struct testcases *test) +{ + int ret, child_ret; + pid_t pid; + + pid = fork(); + if (pid < 0) { + perror("Fork failed."); + ret = 1; + } else if (pid == 0) { + ret = test->test_func(test); + exit(ret); + } else { + wait(&child_ret); + ret = WEXITSTATUS(child_ret); + } + + return ret; +} + +static int handle_execve(struct testcases *test) +{ + int ret, child_ret; + int lam = test->lam; + pid_t pid; + + pid = fork(); + if (pid < 0) { + perror("Fork failed."); + ret = 1; + } else if (pid == 0) { + char path[PATH_MAX] = {0}; + + /* Set LAM mode in parent process */ + if (set_lam(lam) != 0) + return 1; + + /* Get current binary's path and the binary was run by execve */ + if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0) + exit(-1); + + /* run binary to get LAM mode and return to parent process */ + if (execlp(path, path, "-t 0x0", NULL) < 0) { + perror("error on exec"); + exit(-1); + } + } else { + wait(&child_ret); + ret = WEXITSTATUS(child_ret); + if (ret != LAM_NONE) + return 1; + } + + return 0; +} + +static int handle_inheritance(struct testcases *test) +{ + int ret, child_ret; + int lam = test->lam; + pid_t pid; + + /* Set LAM mode in parent process */ + if (set_lam(lam) != 0) + return 1; + + pid = fork(); + if (pid < 0) { + perror("Fork failed."); + return 1; + } else if (pid == 0) { + /* Set LAM mode in parent process */ + int child_lam = get_lam(); + + exit(child_lam); + } else { + wait(&child_ret); + ret = WEXITSTATUS(child_ret); + + if (lam != ret) + return 1; + } + + return 0; +} + +static int thread_fn_get_lam(void *arg) +{ + return get_lam(); +} + +static int thread_fn_set_lam(void *arg) +{ + struct testcases *test = arg; + + return set_lam(test->lam); +} + +static int handle_thread(struct testcases *test) +{ + char stack[STACK_SIZE]; + int ret, child_ret; + int lam = 0; + pid_t pid; + + /* Set LAM mode in parent process */ + if (!test->later) { + lam = test->lam; + if (set_lam(lam) != 0) + return 1; + } + + pid = clone(thread_fn_get_lam, stack + STACK_SIZE, + SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, NULL); + if (pid < 0) { + perror("Clone failed."); + return 1; + } + + waitpid(pid, &child_ret, 0); + ret = WEXITSTATUS(child_ret); + + if (lam != ret) + return 1; + + if (test->later) { + if (set_lam(test->lam) != 0) + return 1; + } + + return 0; +} + +static int handle_thread_enable(struct testcases *test) +{ + char stack[STACK_SIZE]; + int ret, child_ret; + int lam = test->lam; + pid_t pid; + + pid = clone(thread_fn_set_lam, stack + STACK_SIZE, + SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, test); + if (pid < 0) { + perror("Clone failed."); + return 1; + } + + waitpid(pid, &child_ret, 0); + ret = WEXITSTATUS(child_ret); + + if (lam != ret) + return 1; + + return 0; +} +static void run_test(struct testcases *test, int count) +{ + int i, ret = 0; + + for (i = 0; i < count; i++) { + struct testcases *t = test + i; + + /* fork a process to run test case */ + tests_cnt++; + ret = fork_test(t); + + /* return 3 is not support LA57, the case should be skipped */ + if (ret == 3) { + ksft_test_result_skip("%s", t->msg); + continue; + } + + if (ret != 0) + ret = (t->expected == ret); + else + ret = !(t->expected); + + ksft_test_result(ret, "%s", t->msg); + } +} + +static struct testcases uring_cases[] = { + { + .later = 0, + .lam = LAM_U57_BITS, + .test_func = handle_uring, + .msg = "URING: LAM_U57. Dereferencing pointer with metadata\n", + }, + { + .later = 1, + .expected = 1, + .lam = LAM_U57_BITS, + .test_func = handle_uring, + .msg = "URING:[Negative] Disable LAM. Dereferencing pointer with metadata.\n", + }, +}; + +static struct testcases malloc_cases[] = { + { + .later = 0, + .lam = LAM_U57_BITS, + .test_func = handle_malloc, + .msg = "MALLOC: LAM_U57. Dereferencing pointer with metadata\n", + }, + { + .later = 1, + .expected = 2, + .lam = LAM_U57_BITS, + .test_func = handle_malloc, + .msg = "MALLOC:[Negative] Disable LAM. Dereferencing pointer with metadata.\n", + }, +}; + +static struct testcases bits_cases[] = { + { + .test_func = handle_max_bits, + .msg = "BITS: Check default tag bits\n", + }, +}; + +static struct testcases syscall_cases[] = { + { + .later = 0, + .lam = LAM_U57_BITS, + .test_func = handle_syscall, + .msg = "SYSCALL: LAM_U57. syscall with metadata\n", + }, + { + .later = 1, + .expected = 1, + .lam = LAM_U57_BITS, + .test_func = handle_syscall, + .msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n", + }, +}; + +static struct testcases mmap_cases[] = { + { + .later = 1, + .expected = 0, + .lam = LAM_U57_BITS, + .addr = HIGH_ADDR, + .test_func = handle_mmap, + .msg = "MMAP: First mmap high address, then set LAM_U57.\n", + }, + { + .later = 0, + .expected = 0, + .lam = LAM_U57_BITS, + .addr = HIGH_ADDR, + .test_func = handle_mmap, + .msg = "MMAP: First LAM_U57, then High address.\n", + }, + { + .later = 0, + .expected = 0, + .lam = LAM_U57_BITS, + .addr = LOW_ADDR, + .test_func = handle_mmap, + .msg = "MMAP: First LAM_U57, then Low address.\n", + }, +}; + +static struct testcases inheritance_cases[] = { + { + .expected = 0, + .lam = LAM_U57_BITS, + .test_func = handle_inheritance, + .msg = "FORK: LAM_U57, child process should get LAM mode same as parent\n", + }, + { + .expected = 0, + .lam = LAM_U57_BITS, + .test_func = handle_thread, + .msg = "THREAD: LAM_U57, child thread should get LAM mode same as parent\n", + }, + { + .expected = 1, + .lam = LAM_U57_BITS, + .test_func = handle_thread_enable, + .msg = "THREAD: [NEGATIVE] Enable LAM in child.\n", + }, + { + .expected = 1, + .later = 1, + .lam = LAM_U57_BITS, + .test_func = handle_thread, + .msg = "THREAD: [NEGATIVE] Enable LAM in parent after thread created.\n", + }, + { + .expected = 0, + .lam = LAM_U57_BITS, + .test_func = handle_execve, + .msg = "EXECVE: LAM_U57, child process should get disabled LAM mode\n", + }, +}; + +static void cmd_help(void) +{ + printf("usage: lam [-h] [-t test list]\n"); + printf("\t-t test list: run tests specified in the test list, default:0x%x\n", TEST_MASK); + printf("\t\t0x1:malloc; 0x2:max_bits; 0x4:mmap; 0x8:syscall; 0x10:io_uring; 0x20:inherit;\n"); + printf("\t-h: help\n"); +} + +/* Check for file existence */ +uint8_t file_Exists(const char *fileName) +{ + struct stat buffer; + + uint8_t ret = (stat(fileName, &buffer) == 0); + + return ret; +} + +/* Sysfs idxd files */ +const char *dsa_configs[] = { + "echo 1 > /sys/bus/dsa/devices/dsa0/wq0.1/group_id", + "echo shared > /sys/bus/dsa/devices/dsa0/wq0.1/mode", + "echo 10 > /sys/bus/dsa/devices/dsa0/wq0.1/priority", + "echo 16 > /sys/bus/dsa/devices/dsa0/wq0.1/size", + "echo 15 > /sys/bus/dsa/devices/dsa0/wq0.1/threshold", + "echo user > /sys/bus/dsa/devices/dsa0/wq0.1/type", + "echo MyApp1 > /sys/bus/dsa/devices/dsa0/wq0.1/name", + "echo 1 > /sys/bus/dsa/devices/dsa0/engine0.1/group_id", + "echo dsa0 > /sys/bus/dsa/drivers/idxd/bind", + /* bind files and devices, generated a device file in /dev */ + "echo wq0.1 > /sys/bus/dsa/drivers/user/bind", +}; + +/* DSA device file */ +const char *dsaDeviceFile = "/dev/dsa/wq0.1"; +/* file for io*/ +const char *dsaPasidEnable = "/sys/bus/dsa/devices/dsa0/pasid_enabled"; + +/* + * DSA depends on kernel cmdline "intel_iommu=on,sm_on" + * return pasid_enabled (0: disable 1:enable) + */ +int Check_DSA_Kernel_Setting(void) +{ + char command[256] = ""; + char buf[256] = ""; + char *ptr; + int rv = -1; + + snprintf(command, sizeof(command) - 1, "cat %s", dsaPasidEnable); + + FILE *cmd = popen(command, "r"); + + if (cmd) { + while (fgets(buf, sizeof(buf) - 1, cmd) != NULL); + + pclose(cmd); + rv = strtol(buf, &ptr, 16); + } + + return rv; +} + +/* + * Config DSA's sysfs files as shared DSA's WQ. + * Generated a device file /dev/dsa/wq0.1 + * Return: 0 OK; 1 Failed; 3 Skip(SVA disabled). + */ +int Dsa_Init_Sysfs(void) +{ + uint len = ARRAY_SIZE(dsa_configs); + const char **p = dsa_configs; + + if (file_Exists(dsaDeviceFile) == 1) + return 0; + + /* check the idxd driver */ + if (file_Exists(dsaPasidEnable) != 1) { + printf("Please make sure idxd driver was loaded\n"); + return 3; + } + + /* Check SVA feature */ + if (Check_DSA_Kernel_Setting() != 1) { + printf("Please enable SVA.(Add intel_iommu=on,sm_on in kernel cmdline)\n"); + return 3; + } + + /* Check the idxd device file on /dev/dsa/ */ + for (int i = 0; i < len; i++) { + if (system(p[i])) + return 1; + } + + /* After config, /dev/dsa/wq0.1 should be generated */ + return (file_Exists(dsaDeviceFile) != 1); +} + +/* + * Open DSA device file, triger API: iommu_sva_alloc_pasid + */ +void *allocate_dsa_pasid(void) +{ + int fd; + void *wq; + + fd = open(dsaDeviceFile, O_RDWR); + if (fd < 0) { + perror("open"); + return MAP_FAILED; + } + + wq = mmap(NULL, 0x1000, PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (wq == MAP_FAILED) + perror("mmap"); + + return wq; +} + +int set_force_svm(void) +{ + int ret = 0; + + ret = syscall(SYS_arch_prctl, ARCH_FORCE_TAGGED_SVA); + + return ret; +} + +int handle_pasid(struct testcases *test) +{ + uint tmp = test->cmd; + uint runed = 0x0; + int ret = 0; + void *wq = NULL; + + ret = Dsa_Init_Sysfs(); + if (ret != 0) + return ret; + + for (int i = 0; i < 3; i++) { + int err = 0; + + if (tmp & 0x1) { + /* run set lam mode*/ + if ((runed & 0x1) == 0) { + err = set_lam(LAM_U57_BITS); + runed = runed | 0x1; + } else + err = 1; + } else if (tmp & 0x4) { + /* run force svm */ + if ((runed & 0x4) == 0) { + err = set_force_svm(); + runed = runed | 0x4; + } else + err = 1; + } else if (tmp & 0x2) { + /* run allocate pasid */ + if ((runed & 0x2) == 0) { + runed = runed | 0x2; + wq = allocate_dsa_pasid(); + if (wq == MAP_FAILED) + err = 1; + } else + err = 1; + } + + ret = ret + err; + if (ret > 0) + break; + + tmp = tmp >> 4; + } + + if (wq != MAP_FAILED && wq != NULL) + if (munmap(wq, 0x1000)) + printf("munmap failed %d\n", errno); + + if (runed != 0x7) + ret = 1; + + return (ret != 0); +} + +/* + * Pasid test depends on idxd and SVA, kernel should enable iommu and sm. + * command line(intel_iommu=on,sm_on) + */ +static struct testcases pasid_cases[] = { + { + .expected = 1, + .cmd = PAS_CMD(LAM_CMD_BIT, PAS_CMD_BIT, SVA_CMD_BIT), + .test_func = handle_pasid, + .msg = "PASID: [Negative] Execute LAM, PASID, SVA in sequence\n", + }, + { + .expected = 0, + .cmd = PAS_CMD(LAM_CMD_BIT, SVA_CMD_BIT, PAS_CMD_BIT), + .test_func = handle_pasid, + .msg = "PASID: Execute LAM, SVA, PASID in sequence\n", + }, + { + .expected = 1, + .cmd = PAS_CMD(PAS_CMD_BIT, LAM_CMD_BIT, SVA_CMD_BIT), + .test_func = handle_pasid, + .msg = "PASID: [Negative] Execute PASID, LAM, SVA in sequence\n", + }, + { + .expected = 0, + .cmd = PAS_CMD(PAS_CMD_BIT, SVA_CMD_BIT, LAM_CMD_BIT), + .test_func = handle_pasid, + .msg = "PASID: Execute PASID, SVA, LAM in sequence\n", + }, + { + .expected = 0, + .cmd = PAS_CMD(SVA_CMD_BIT, LAM_CMD_BIT, PAS_CMD_BIT), + .test_func = handle_pasid, + .msg = "PASID: Execute SVA, LAM, PASID in sequence\n", + }, + { + .expected = 0, + .cmd = PAS_CMD(SVA_CMD_BIT, PAS_CMD_BIT, LAM_CMD_BIT), + .test_func = handle_pasid, + .msg = "PASID: Execute SVA, PASID, LAM in sequence\n", + }, +}; + +int main(int argc, char **argv) +{ + int c = 0; + unsigned int tests = TEST_MASK; + + tests_cnt = 0; + + if (!cpu_has_lam()) { + ksft_print_msg("Unsupported LAM feature!\n"); + return -1; + } + + while ((c = getopt(argc, argv, "ht:")) != -1) { + switch (c) { + case 't': + tests = strtoul(optarg, NULL, 16); + if (tests && !(tests & TEST_MASK)) { + ksft_print_msg("Invalid argument!\n"); + return -1; + } + break; + case 'h': + cmd_help(); + return 0; + default: + ksft_print_msg("Invalid argument\n"); + return -1; + } + } + + /* + * When tests is 0, it is not a real test case; + * the option used by test case(execve) to check the lam mode in + * process generated by execve, the process read back lam mode and + * check with lam mode in parent process. + */ + if (!tests) + return (get_lam()); + + /* Run test cases */ + if (tests & FUNC_MALLOC) + run_test(malloc_cases, ARRAY_SIZE(malloc_cases)); + + if (tests & FUNC_BITS) + run_test(bits_cases, ARRAY_SIZE(bits_cases)); + + if (tests & FUNC_MMAP) + run_test(mmap_cases, ARRAY_SIZE(mmap_cases)); + + if (tests & FUNC_SYSCALL) + run_test(syscall_cases, ARRAY_SIZE(syscall_cases)); + + if (tests & FUNC_URING) + run_test(uring_cases, ARRAY_SIZE(uring_cases)); + + if (tests & FUNC_INHERITE) + run_test(inheritance_cases, ARRAY_SIZE(inheritance_cases)); + + if (tests & FUNC_PASID) + run_test(pasid_cases, ARRAY_SIZE(pasid_cases)); + + ksft_set_plan(tests_cnt); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c index 1aef72df20a1..3a29346e1452 100644 --- a/tools/testing/selftests/x86/ldt_gdt.c +++ b/tools/testing/selftests/x86/ldt_gdt.c @@ -607,7 +607,7 @@ static void do_multicpu_tests(void) failures++; asm volatile ("mov %0, %%ss" : : "rm" (orig_ss)); - }; + } ftx = 100; /* Kill the thread. */ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0); diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c index 6da0ac3f0135..cc3de6ff9fba 100644 --- a/tools/testing/selftests/x86/mov_ss_trap.c +++ b/tools/testing/selftests/x86/mov_ss_trap.c @@ -47,7 +47,6 @@ unsigned short ss; extern unsigned char breakpoint_insn[]; sigjmp_buf jmpbuf; -static unsigned char altstack_data[SIGSTKSZ]; static void enable_watchpoint(void) { @@ -250,13 +249,14 @@ int main() if (sigsetjmp(jmpbuf, 1) == 0) { printf("[RUN]\tMOV SS; SYSENTER\n"); stack_t stack = { - .ss_sp = altstack_data, + .ss_sp = malloc(sizeof(char) * SIGSTKSZ), .ss_size = SIGSTKSZ, }; if (sigaltstack(&stack, NULL) != 0) err(1, "sigaltstack"); sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK); nr = SYS_getpid; + free(stack.ss_sp); /* Clear EBP first to make sure we segfault cleanly. */ asm volatile ("xorl %%ebp, %%ebp; mov %[ss], %%ss; SYSENTER" : "+a" (nr) : [ss] "m" (ss) : "flags", "rcx" diff --git a/tools/testing/selftests/x86/nx_stack.c b/tools/testing/selftests/x86/nx_stack.c new file mode 100644 index 000000000000..ea4a4e246879 --- /dev/null +++ b/tools/testing/selftests/x86/nx_stack.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2023 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Test that userspace stack is NX. Requires linking with -Wl,-z,noexecstack + * because I don't want to bother with PT_GNU_STACK detection. + * + * Fill the stack with INT3's and then try to execute some of them: + * SIGSEGV -- good, SIGTRAP -- bad. + * + * Regular stack is completely overwritten before testing. + * Test doesn't exit SIGSEGV handler after first fault at INT3. + */ +#undef _GNU_SOURCE +#define _GNU_SOURCE +#undef NDEBUG +#include <assert.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <unistd.h> + +#define PAGE_SIZE 4096 + +/* + * This is memset(rsp, 0xcc, -1); but down. + * It will SIGSEGV when bottom of the stack is reached. + * Byte-size access is important! (see rdi tweak in the signal handler). + */ +void make_stack1(void); +asm( +".pushsection .text\n" +".globl make_stack1\n" +".align 16\n" +"make_stack1:\n" + "mov $0xcc, %al\n" +#if defined __amd64__ + "mov %rsp, %rdi\n" + "mov $-1, %rcx\n" +#elif defined __i386__ + "mov %esp, %edi\n" + "mov $-1, %ecx\n" +#else +#error +#endif + "std\n" + "rep stosb\n" + /* unreachable */ + "hlt\n" +".type make_stack1,@function\n" +".size make_stack1,.-make_stack1\n" +".popsection\n" +); + +/* + * memset(p, 0xcc, -1); + * It will SIGSEGV when top of the stack is reached. + */ +void make_stack2(uint64_t p); +asm( +".pushsection .text\n" +".globl make_stack2\n" +".align 16\n" +"make_stack2:\n" + "mov $0xcc, %al\n" +#if defined __amd64__ + "mov $-1, %rcx\n" +#elif defined __i386__ + "mov $-1, %ecx\n" +#else +#error +#endif + "cld\n" + "rep stosb\n" + /* unreachable */ + "hlt\n" +".type make_stack2,@function\n" +".size make_stack2,.-make_stack2\n" +".popsection\n" +); + +static volatile int test_state = 0; +static volatile unsigned long stack_min_addr; + +#if defined __amd64__ +#define RDI REG_RDI +#define RIP REG_RIP +#define RIP_STRING "rip" +#elif defined __i386__ +#define RDI REG_EDI +#define RIP REG_EIP +#define RIP_STRING "eip" +#else +#error +#endif + +static void sigsegv(int _, siginfo_t *__, void *uc_) +{ + /* + * Some Linux versions didn't clear DF before entering signal + * handler. make_stack1() doesn't have a chance to clear DF + * either so we clear it by hand here. + */ + asm volatile ("cld" ::: "memory"); + + ucontext_t *uc = uc_; + + if (test_state == 0) { + /* Stack is faulted and cleared from RSP to the lowest address. */ + stack_min_addr = ++uc->uc_mcontext.gregs[RDI]; + if (1) { + printf("stack min %lx\n", stack_min_addr); + } + uc->uc_mcontext.gregs[RIP] = (uintptr_t)&make_stack2; + test_state = 1; + } else if (test_state == 1) { + /* Stack has been cleared from top to bottom. */ + unsigned long stack_max_addr = uc->uc_mcontext.gregs[RDI]; + if (1) { + printf("stack max %lx\n", stack_max_addr); + } + /* Start faulting pages on stack and see what happens. */ + uc->uc_mcontext.gregs[RIP] = stack_max_addr - PAGE_SIZE; + test_state = 2; + } else if (test_state == 2) { + /* Stack page is NX -- good, test next page. */ + uc->uc_mcontext.gregs[RIP] -= PAGE_SIZE; + if (uc->uc_mcontext.gregs[RIP] == stack_min_addr) { + /* One more SIGSEGV and test ends. */ + test_state = 3; + } + } else { + printf("PASS\tAll stack pages are NX\n"); + _exit(EXIT_SUCCESS); + } +} + +static void sigtrap(int _, siginfo_t *__, void *uc_) +{ + const ucontext_t *uc = uc_; + unsigned long rip = uc->uc_mcontext.gregs[RIP]; + printf("FAIL\texecutable page on the stack: " RIP_STRING " %lx\n", rip); + _exit(EXIT_FAILURE); +} + +int main(void) +{ + { + struct sigaction act = {}; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = &sigsegv; + int rv = sigaction(SIGSEGV, &act, NULL); + assert(rv == 0); + } + { + struct sigaction act = {}; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = &sigtrap; + int rv = sigaction(SIGTRAP, &act, NULL); + assert(rv == 0); + } + { + struct rlimit rlim; + int rv = getrlimit(RLIMIT_STACK, &rlim); + assert(rv == 0); + /* Cap stack at time-honored 8 MiB value. */ + rlim.rlim_max = rlim.rlim_cur; + if (rlim.rlim_max > 8 * 1024 * 1024) { + rlim.rlim_max = 8 * 1024 * 1024; + } + rv = setrlimit(RLIMIT_STACK, &rlim); + assert(rv == 0); + } + { + /* + * We don't know now much stack SIGSEGV handler uses. + * Bump this by 1 page every time someone complains, + * or rewrite it in assembly. + */ + const size_t len = SIGSTKSZ; + void *p = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + assert(p != MAP_FAILED); + stack_t ss = {}; + ss.ss_sp = p; + ss.ss_size = len; + int rv = sigaltstack(&ss, NULL); + assert(rv == 0); + } + make_stack1(); + /* + * Unreachable, but if _this_ INT3 is ever reached, it's a bug somewhere. + * Fold it into main SIGTRAP pathway. + */ + __builtin_trap(); +} diff --git a/tools/testing/selftests/x86/raw_syscall_helper_32.S b/tools/testing/selftests/x86/raw_syscall_helper_32.S index 94410fa2b5ed..a10d36afdca0 100644 --- a/tools/testing/selftests/x86/raw_syscall_helper_32.S +++ b/tools/testing/selftests/x86/raw_syscall_helper_32.S @@ -45,3 +45,5 @@ int80_and_ret: .type int80_and_ret, @function .size int80_and_ret, .-int80_and_ret + +.section .note.GNU-stack,"",%progbits diff --git a/tools/testing/selftests/x86/sigaltstack.c b/tools/testing/selftests/x86/sigaltstack.c new file mode 100644 index 000000000000..f689af75e979 --- /dev/null +++ b/tools/testing/selftests/x86/sigaltstack.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define _GNU_SOURCE +#include <signal.h> +#include <stdio.h> +#include <stdbool.h> +#include <string.h> +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <sys/mman.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <sys/resource.h> +#include <setjmp.h> + +/* sigaltstack()-enforced minimum stack */ +#define ENFORCED_MINSIGSTKSZ 2048 + +#ifndef AT_MINSIGSTKSZ +# define AT_MINSIGSTKSZ 51 +#endif + +static int nerrs; + +static bool sigalrm_expected; + +static unsigned long at_minstack_size; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static int setup_altstack(void *start, unsigned long size) +{ + stack_t ss; + + memset(&ss, 0, sizeof(ss)); + ss.ss_size = size; + ss.ss_sp = start; + + return sigaltstack(&ss, NULL); +} + +static jmp_buf jmpbuf; + +static void sigsegv(int sig, siginfo_t *info, void *ctx_void) +{ + if (sigalrm_expected) { + printf("[FAIL]\tWrong signal delivered: SIGSEGV (expected SIGALRM)."); + nerrs++; + } else { + printf("[OK]\tSIGSEGV signal delivered.\n"); + } + + siglongjmp(jmpbuf, 1); +} + +static void sigalrm(int sig, siginfo_t *info, void *ctx_void) +{ + if (!sigalrm_expected) { + printf("[FAIL]\tWrong signal delivered: SIGALRM (expected SIGSEGV)."); + nerrs++; + } else { + printf("[OK]\tSIGALRM signal delivered.\n"); + } +} + +static void test_sigaltstack(void *altstack, unsigned long size) +{ + if (setup_altstack(altstack, size)) + err(1, "sigaltstack()"); + + sigalrm_expected = (size > at_minstack_size) ? true : false; + + sethandler(SIGSEGV, sigsegv, 0); + sethandler(SIGALRM, sigalrm, SA_ONSTACK); + + if (!sigsetjmp(jmpbuf, 1)) { + printf("[RUN]\tTest an alternate signal stack of %ssufficient size.\n", + sigalrm_expected ? "" : "in"); + printf("\tRaise SIGALRM. %s is expected to be delivered.\n", + sigalrm_expected ? "It" : "SIGSEGV"); + raise(SIGALRM); + } + + clearhandler(SIGALRM); + clearhandler(SIGSEGV); +} + +int main(void) +{ + void *altstack; + + at_minstack_size = getauxval(AT_MINSIGSTKSZ); + + altstack = mmap(NULL, at_minstack_size + SIGSTKSZ, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (altstack == MAP_FAILED) + err(1, "mmap()"); + + if ((ENFORCED_MINSIGSTKSZ + 1) < at_minstack_size) + test_sigaltstack(altstack, ENFORCED_MINSIGSTKSZ + 1); + + test_sigaltstack(altstack, at_minstack_size + SIGSTKSZ); + + return nerrs == 0 ? 0 : 1; +} diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index 57c4f67f16ef..5d7961a5f7f6 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c @@ -138,9 +138,6 @@ static unsigned short LDT3(int idx) return (idx << 3) | 7; } -/* Our sigaltstack scratch space. */ -static char altstack_data[SIGSTKSZ]; - static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { @@ -771,7 +768,8 @@ int main() setup_ldt(); stack_t stack = { - .ss_sp = altstack_data, + /* Our sigaltstack scratch space. */ + .ss_sp = malloc(sizeof(char) * SIGSTKSZ), .ss_size = SIGSTKSZ, }; if (sigaltstack(&stack, NULL) != 0) @@ -872,5 +870,6 @@ int main() total_nerrs += test_nonstrict_ss(); #endif + free(stack.ss_sp); return total_nerrs ? 1 : 0; } diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c index 1063328e275c..9a30f443e928 100644 --- a/tools/testing/selftests/x86/single_step_syscall.c +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -31,6 +31,8 @@ #include <sys/ptrace.h> #include <sys/user.h> +#include "helpers.h" + static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { @@ -55,7 +57,6 @@ static void clearhandler(int sig) static volatile sig_atomic_t sig_traps, sig_eflags; sigjmp_buf jmpbuf; -static unsigned char altstack_data[SIGSTKSZ]; #ifdef __x86_64__ # define REG_IP REG_RIP @@ -67,21 +68,6 @@ static unsigned char altstack_data[SIGSTKSZ]; # define INT80_CLOBBERS #endif -static unsigned long get_eflags(void) -{ - unsigned long eflags; - asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); - return eflags; -} - -static void set_eflags(unsigned long eflags) -{ - asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH - : : "rm" (eflags) : "flags"); -} - -#define X86_EFLAGS_TF (1UL << 8) - static void sigtrap(int sig, siginfo_t *info, void *ctx_void) { ucontext_t *ctx = (ucontext_t*)ctx_void; @@ -223,7 +209,7 @@ int main() unsigned long nr = SYS_getpid; printf("[RUN]\tSet TF and check SYSENTER\n"); stack_t stack = { - .ss_sp = altstack_data, + .ss_sp = malloc(sizeof(char) * SIGSTKSZ), .ss_size = SIGSTKSZ, }; if (sigaltstack(&stack, NULL) != 0) @@ -232,6 +218,7 @@ int main() SA_RESETHAND | SA_ONSTACK); sethandler(SIGILL, print_and_longjmp, SA_RESETHAND); set_eflags(get_eflags() | X86_EFLAGS_TF); + free(stack.ss_sp); /* Clear EBP first to make sure we segfault cleanly. */ asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx" #ifdef __x86_64__ diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c index bc0ecc2e862e..461fa41a4d02 100644 --- a/tools/testing/selftests/x86/syscall_arg_fault.c +++ b/tools/testing/selftests/x86/syscall_arg_fault.c @@ -15,29 +15,7 @@ #include <setjmp.h> #include <errno.h> -#ifdef __x86_64__ -# define WIDTH "q" -#else -# define WIDTH "l" -#endif - -/* Our sigaltstack scratch space. */ -static unsigned char altstack_data[SIGSTKSZ]; - -static unsigned long get_eflags(void) -{ - unsigned long eflags; - asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); - return eflags; -} - -static void set_eflags(unsigned long eflags) -{ - asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH - : : "rm" (eflags) : "flags"); -} - -#define X86_EFLAGS_TF (1UL << 8) +#include "helpers.h" static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) @@ -72,6 +50,7 @@ static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void) if (ax != -EFAULT && ax != -ENOSYS) { printf("[FAIL]\tAX had the wrong value: 0x%lx\n", (unsigned long)ax); + printf("\tIP = 0x%lx\n", (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); n_errs++; } else { printf("[OK]\tSeems okay\n"); @@ -122,7 +101,8 @@ static void sigill(int sig, siginfo_t *info, void *ctx_void) int main() { stack_t stack = { - .ss_sp = altstack_data, + /* Our sigaltstack scratch space. */ + .ss_sp = malloc(sizeof(char) * SIGSTKSZ), .ss_size = SIGSTKSZ, }; if (sigaltstack(&stack, NULL) != 0) @@ -226,5 +206,31 @@ int main() } set_eflags(get_eflags() & ~X86_EFLAGS_TF); +#ifdef __x86_64__ + printf("[RUN]\tSYSENTER with TF, invalid state, and GSBASE < 0\n"); + + if (sigsetjmp(jmpbuf, 1) == 0) { + sigtrap_consecutive_syscalls = 0; + + asm volatile ("wrgsbase %%rax\n\t" + :: "a" (0xffffffffffff0000UL)); + + set_eflags(get_eflags() | X86_EFLAGS_TF); + asm volatile ( + "movl $-1, %%eax\n\t" + "movl $-1, %%ebx\n\t" + "movl $-1, %%ecx\n\t" + "movl $-1, %%edx\n\t" + "movl $-1, %%esi\n\t" + "movl $-1, %%edi\n\t" + "movl $-1, %%ebp\n\t" + "movl $-1, %%esp\n\t" + "sysenter" + : : : "memory", "flags"); + } + set_eflags(get_eflags() & ~X86_EFLAGS_TF); +#endif + + free(stack.ss_sp); return 0; } diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c index 02309a195041..a108b80dd082 100644 --- a/tools/testing/selftests/x86/syscall_nt.c +++ b/tools/testing/selftests/x86/syscall_nt.c @@ -13,29 +13,11 @@ #include <signal.h> #include <err.h> #include <sys/syscall.h> -#include <asm/processor-flags.h> -#ifdef __x86_64__ -# define WIDTH "q" -#else -# define WIDTH "l" -#endif +#include "helpers.h" static unsigned int nerrs; -static unsigned long get_eflags(void) -{ - unsigned long eflags; - asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); - return eflags; -} - -static void set_eflags(unsigned long eflags) -{ - asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH - : : "rm" (eflags) : "flags"); -} - static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) { @@ -59,6 +41,7 @@ static void do_it(unsigned long extraflags) set_eflags(get_eflags() | extraflags); syscall(SYS_getpid); flags = get_eflags(); + set_eflags(X86_EFLAGS_IF | X86_EFLAGS_FIXED); if ((flags & extraflags) == extraflags) { printf("[OK]\tThe syscall worked and flags are still set\n"); } else { @@ -73,6 +56,12 @@ int main(void) printf("[RUN]\tSet NT and issue a syscall\n"); do_it(X86_EFLAGS_NT); + printf("[RUN]\tSet AC and issue a syscall\n"); + do_it(X86_EFLAGS_AC); + + printf("[RUN]\tSet NT|AC and issue a syscall\n"); + do_it(X86_EFLAGS_NT | X86_EFLAGS_AC); + /* * Now try it again with TF set -- TF forces returns via IRET in all * cases except non-ptregs-using 64-bit full fast path syscalls. @@ -80,8 +69,28 @@ int main(void) sethandler(SIGTRAP, sigtrap, 0); + printf("[RUN]\tSet TF and issue a syscall\n"); + do_it(X86_EFLAGS_TF); + printf("[RUN]\tSet NT|TF and issue a syscall\n"); do_it(X86_EFLAGS_NT | X86_EFLAGS_TF); + printf("[RUN]\tSet AC|TF and issue a syscall\n"); + do_it(X86_EFLAGS_AC | X86_EFLAGS_TF); + + printf("[RUN]\tSet NT|AC|TF and issue a syscall\n"); + do_it(X86_EFLAGS_NT | X86_EFLAGS_AC | X86_EFLAGS_TF); + + /* + * Now try DF. This is evil and it's plausible that we will crash + * glibc, but glibc would have to do something rather surprising + * for this to happen. + */ + printf("[RUN]\tSet DF and issue a syscall\n"); + do_it(X86_EFLAGS_DF); + + printf("[RUN]\tSet TF|DF and issue a syscall\n"); + do_it(X86_EFLAGS_TF | X86_EFLAGS_DF); + return nerrs == 0 ? 0 : 1; } diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c index d6b09cb1aa2c..991591718bb0 100644 --- a/tools/testing/selftests/x86/syscall_numbering.c +++ b/tools/testing/selftests/x86/syscall_numbering.c @@ -1,6 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args + * syscall_numbering.c - test calling the x86-64 kernel with various + * valid and invalid system call numbers. + * * Copyright (c) 2018 Andrew Lutomirski */ @@ -11,79 +13,470 @@ #include <stdbool.h> #include <errno.h> #include <unistd.h> -#include <syscall.h> +#include <string.h> +#include <fcntl.h> +#include <limits.h> +#include <signal.h> +#include <sysexits.h> -static int nerrs; +#include <sys/ptrace.h> +#include <sys/user.h> +#include <sys/wait.h> +#include <sys/mman.h> -#define X32_BIT 0x40000000UL +#include <linux/ptrace.h> -static void check_enosys(unsigned long nr, bool *ok) +/* Common system call numbers */ +#define SYS_READ 0 +#define SYS_WRITE 1 +#define SYS_GETPID 39 +/* x64-only system call numbers */ +#define X64_IOCTL 16 +#define X64_READV 19 +#define X64_WRITEV 20 +/* x32-only system call numbers (without X32_BIT) */ +#define X32_IOCTL 514 +#define X32_READV 515 +#define X32_WRITEV 516 + +#define X32_BIT 0x40000000 + +static int nullfd = -1; /* File descriptor for /dev/null */ +static bool with_x32; /* x32 supported on this kernel? */ + +enum ptrace_pass { + PTP_NOTHING, + PTP_GETREGS, + PTP_WRITEBACK, + PTP_FUZZRET, + PTP_FUZZHIGH, + PTP_INTNUM, + PTP_DONE +}; + +static const char * const ptrace_pass_name[] = { - /* If this fails, a segfault is reasonably likely. */ - fflush(stdout); + [PTP_NOTHING] = "just stop, no data read", + [PTP_GETREGS] = "only getregs", + [PTP_WRITEBACK] = "getregs, unmodified setregs", + [PTP_FUZZRET] = "modifying the default return", + [PTP_FUZZHIGH] = "clobbering the top 32 bits", + [PTP_INTNUM] = "sign-extending the syscall number", +}; - long ret = syscall(nr, 0, 0, 0, 0, 0, 0); - if (ret == 0) { - printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr); - *ok = false; - } else if (errno != ENOSYS) { - printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno); - *ok = false; - } +/* + * Shared memory block between tracer and test + */ +struct shared { + unsigned int nerr; /* Total error count */ + unsigned int indent; /* Message indentation level */ + enum ptrace_pass ptrace_pass; + bool probing_syscall; /* In probe_syscall() */ +}; +static volatile struct shared *sh; + +static inline unsigned int offset(void) +{ + unsigned int level = sh ? sh->indent : 0; + + return 8 + level * 4; } -static void test_x32_without_x32_bit(void) +#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \ + ## __VA_ARGS__) + +#define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__) +#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__) +#define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__) + +#define fail(fmt, ...) \ + do { \ + msg(FAIL, fmt, ## __VA_ARGS__); \ + sh->nerr++; \ + } while (0) + +#define crit(fmt, ...) \ + do { \ + sh->indent = 0; \ + msg(FAIL, fmt, ## __VA_ARGS__); \ + msg(SKIP, "Unable to run test\n"); \ + exit(EX_OSERR); \ + } while (0) + +/* Sentinel for ptrace-modified return value */ +#define MODIFIED_BY_PTRACE -9999 + +/* + * Directly invokes the given syscall with nullfd as the first argument + * and the rest zero. Avoids involving glibc wrappers in case they ever + * end up intercepting some system calls for some reason, or modify + * the system call number itself. + */ +static long long probe_syscall(int msb, int lsb) { - bool ok = true; + register long long arg1 asm("rdi") = nullfd; + register long long arg2 asm("rsi") = 0; + register long long arg3 asm("rdx") = 0; + register long long arg4 asm("r10") = 0; + register long long arg5 asm("r8") = 0; + register long long arg6 asm("r9") = 0; + long long nr = ((long long)msb << 32) | (unsigned int)lsb; + long long ret; /* - * Syscalls 512-547 are "x32" syscalls. They are intended to be - * called with the x32 (0x40000000) bit set. Calling them without - * the x32 bit set is nonsense and should not work. + * We pass in an extra copy of the extended system call number + * in %rbx, so we can examine it from the ptrace handler without + * worrying about it being possibly modified. This is to test + * the validity of struct user regs.orig_rax a.k.a. + * struct pt_regs.orig_ax. */ - printf("[RUN]\tChecking syscalls 512-547\n"); - for (int i = 512; i <= 547; i++) - check_enosys(i, &ok); + sh->probing_syscall = true; + asm volatile("syscall" + : "=a" (ret) + : "a" (nr), "b" (nr), + "r" (arg1), "r" (arg2), "r" (arg3), + "r" (arg4), "r" (arg5), "r" (arg6) + : "rcx", "r11", "memory", "cc"); + sh->probing_syscall = false; + + return ret; +} + +static const char *syscall_str(int msb, int start, int end) +{ + static char buf[64]; + const char * const type = (start & X32_BIT) ? "x32" : "x64"; + int lsb = start; /* - * Check that a handful of 64-bit-only syscalls are rejected if the x32 - * bit is set. + * Improve readability by stripping the x32 bit, but round + * toward zero so we don't display -1 as -1073741825. */ - printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n"); - check_enosys(16 | X32_BIT, &ok); /* ioctl */ - check_enosys(19 | X32_BIT, &ok); /* readv */ - check_enosys(20 | X32_BIT, &ok); /* writev */ + if (lsb < 0) + lsb |= X32_BIT; + else + lsb &= ~X32_BIT; + + if (start == end) + snprintf(buf, sizeof buf, "%s syscall %d:%d", + type, msb, lsb); + else + snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d", + type, msb, lsb, lsb + (end-start)); + + return buf; +} + +static unsigned int _check_for(int msb, int start, int end, long long expect, + const char *expect_str) +{ + unsigned int err = 0; + + sh->indent++; + if (start != end) + sh->indent++; + + for (int nr = start; nr <= end; nr++) { + long long ret = probe_syscall(msb, nr); + + if (ret != expect) { + fail("%s returned %lld, but it should have returned %s\n", + syscall_str(msb, nr, nr), + ret, expect_str); + err++; + } + } + + if (start != end) + sh->indent--; + + if (err) { + if (start != end) + fail("%s had %u failure%s\n", + syscall_str(msb, start, end), + err, err == 1 ? "s" : ""); + } else { + ok("%s returned %s as expected\n", + syscall_str(msb, start, end), expect_str); + } + + sh->indent--; + + return err; +} + +#define check_for(msb,start,end,expect) \ + _check_for(msb,start,end,expect,#expect) + +static bool check_zero(int msb, int nr) +{ + return check_for(msb, nr, nr, 0); +} + +static bool check_enosys(int msb, int nr) +{ + return check_for(msb, nr, nr, -ENOSYS); +} + +/* + * Anyone diagnosing a failure will want to know whether the kernel + * supports x32. Tell them. This can also be used to conditionalize + * tests based on existence or nonexistence of x32. + */ +static bool test_x32(void) +{ + long long ret; + pid_t mypid = getpid(); + + run("Checking for x32 by calling x32 getpid()\n"); + ret = probe_syscall(0, SYS_GETPID | X32_BIT); + + sh->indent++; + if (ret == mypid) { + info("x32 is supported\n"); + with_x32 = true; + } else if (ret == -ENOSYS) { + info("x32 is not supported\n"); + with_x32 = false; + } else { + fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid); + with_x32 = false; + } + sh->indent--; + return with_x32; +} + +static void test_syscalls_common(int msb) +{ + enum ptrace_pass pass = sh->ptrace_pass; + + run("Checking some common syscalls as 64 bit\n"); + check_zero(msb, SYS_READ); + check_zero(msb, SYS_WRITE); + + run("Checking some 64-bit only syscalls as 64 bit\n"); + check_zero(msb, X64_READV); + check_zero(msb, X64_WRITEV); + + run("Checking out of range system calls\n"); + check_for(msb, -64, -2, -ENOSYS); + if (pass >= PTP_FUZZRET) + check_for(msb, -1, -1, MODIFIED_BY_PTRACE); + else + check_for(msb, -1, -1, -ENOSYS); + check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS); + check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS); + check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS); +} +static void test_syscalls_with_x32(int msb) +{ /* - * Check some syscalls with high bits set. + * Syscalls 512-547 are "x32" syscalls. They are + * intended to be called with the x32 (0x40000000) bit + * set. Calling them without the x32 bit set is + * nonsense and should not work. */ - printf("[RUN]\tChecking numbers above 2^32-1\n"); - check_enosys((1UL << 32), &ok); - check_enosys(X32_BIT | (1UL << 32), &ok); + run("Checking x32 syscalls as 64 bit\n"); + check_for(msb, 512, 547, -ENOSYS); - if (!ok) - nerrs++; - else - printf("[OK]\tThey all returned -ENOSYS\n"); + run("Checking some common syscalls as x32\n"); + check_zero(msb, SYS_READ | X32_BIT); + check_zero(msb, SYS_WRITE | X32_BIT); + + run("Checking some x32 syscalls as x32\n"); + check_zero(msb, X32_READV | X32_BIT); + check_zero(msb, X32_WRITEV | X32_BIT); + + run("Checking some 64-bit syscalls as x32\n"); + check_enosys(msb, X64_IOCTL | X32_BIT); + check_enosys(msb, X64_READV | X32_BIT); + check_enosys(msb, X64_WRITEV | X32_BIT); } -int main() +static void test_syscalls_without_x32(int msb) { + run("Checking for absence of x32 system calls\n"); + check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS); +} + +static void test_syscall_numbering(void) +{ + static const int msbs[] = { + 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX, + INT_MIN, INT_MIN+1 + }; + + sh->indent++; + /* - * Anyone diagnosing a failure will want to know whether the kernel - * supports x32. Tell them. + * The MSB is supposed to be ignored, so we loop over a few + * to test that out. */ - printf("\tChecking for x32..."); - fflush(stdout); - if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) { - printf(" supported\n"); - } else if (errno == ENOSYS) { - printf(" not supported\n"); + for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) { + int msb = msbs[i]; + run("Checking system calls with msb = %d (0x%x)\n", + msb, msb); + + sh->indent++; + + test_syscalls_common(msb); + if (with_x32) + test_syscalls_with_x32(msb); + else + test_syscalls_without_x32(msb); + + sh->indent--; + } + + sh->indent--; +} + +static void syscall_numbering_tracee(void) +{ + enum ptrace_pass pass; + + if (ptrace(PTRACE_TRACEME, 0, 0, 0)) { + crit("Failed to request tracing\n"); + return; + } + raise(SIGSTOP); + + for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE; + sh->ptrace_pass = ++pass) { + run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]); + test_syscall_numbering(); + } +} + +static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass) +{ + struct user_regs_struct regs; + + sh->probing_syscall = false; /* Do this on entry only */ + + /* For these, don't even getregs */ + if (pass == PTP_NOTHING || pass == PTP_DONE) + return; + + ptrace(PTRACE_GETREGS, testpid, NULL, ®s); + + if (regs.orig_rax != regs.rbx) { + fail("orig_rax %#llx doesn't match syscall number %#llx\n", + (unsigned long long)regs.orig_rax, + (unsigned long long)regs.rbx); + } + + switch (pass) { + case PTP_GETREGS: + /* Just read, no writeback */ + return; + case PTP_WRITEBACK: + /* Write back the same register state verbatim */ + break; + case PTP_FUZZRET: + regs.rax = MODIFIED_BY_PTRACE; + break; + case PTP_FUZZHIGH: + regs.rax = MODIFIED_BY_PTRACE; + regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL; + break; + case PTP_INTNUM: + regs.rax = MODIFIED_BY_PTRACE; + regs.orig_rax = (int)regs.orig_rax; + break; + default: + crit("invalid ptrace_pass\n"); + break; + } + + ptrace(PTRACE_SETREGS, testpid, NULL, ®s); +} + +static void syscall_numbering_tracer(pid_t testpid) +{ + int wstatus; + + do { + pid_t wpid = waitpid(testpid, &wstatus, 0); + if (wpid < 0 && errno != EINTR) + break; + if (wpid != testpid) + continue; + if (!WIFSTOPPED(wstatus)) + break; /* Thread exited? */ + + if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP) + mess_with_syscall(testpid, sh->ptrace_pass); + } while (sh->ptrace_pass != PTP_DONE && + !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL)); + + ptrace(PTRACE_DETACH, testpid, NULL, NULL); + + /* Wait for the child process to terminate */ + while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus)) + /* wait some more */; +} + +static void test_traced_syscall_numbering(void) +{ + pid_t testpid; + + /* Launch the test thread; this thread continues as the tracer thread */ + testpid = fork(); + + if (testpid < 0) { + crit("Unable to launch tracer process\n"); + } else if (testpid == 0) { + syscall_numbering_tracee(); + _exit(0); } else { - printf(" confused\n"); + syscall_numbering_tracer(testpid); } +} - test_x32_without_x32_bit(); +int main(void) +{ + unsigned int nerr; - return nerrs ? 1 : 0; + /* + * It is quite likely to get a segfault on a failure, so make + * sure the message gets out by setting stdout to nonbuffered. + */ + setvbuf(stdout, NULL, _IONBF, 0); + + /* + * Harmless file descriptor to work on... + */ + nullfd = open("/dev/null", O_RDWR); + if (nullfd < 0) { + crit("Unable to open /dev/null: %s\n", strerror(errno)); + } + + /* + * Set up a block of shared memory... + */ + sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_SHARED, 0, 0); + if (sh == MAP_FAILED) { + crit("Unable to allocated shared memory block: %s\n", + strerror(errno)); + } + + with_x32 = test_x32(); + + run("Running tests without ptrace...\n"); + test_syscall_numbering(); + + test_traced_syscall_numbering(); + + nerr = sh->nerr; + if (!nerr) { + ok("All system calls succeeded or failed as expected\n"); + return 0; + } else { + fail("A total of %u system call%s had incorrect behavior\n", + nerr, nerr != 1 ? "s" : ""); + return 1; + } } diff --git a/tools/testing/selftests/x86/test_shadow_stack.c b/tools/testing/selftests/x86/test_shadow_stack.c new file mode 100644 index 000000000000..757e6527f67e --- /dev/null +++ b/tools/testing/selftests/x86/test_shadow_stack.c @@ -0,0 +1,884 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This program test's basic kernel shadow stack support. It enables shadow + * stack manual via the arch_prctl(), instead of relying on glibc. It's + * Makefile doesn't compile with shadow stack support, so it doesn't rely on + * any particular glibc. As a result it can't do any operations that require + * special glibc shadow stack support (longjmp(), swapcontext(), etc). Just + * stick to the basics and hope the compiler doesn't do anything strange. + */ + +#define _GNU_SOURCE + +#include <sys/syscall.h> +#include <asm/mman.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdbool.h> +#include <x86intrin.h> +#include <asm/prctl.h> +#include <sys/prctl.h> +#include <stdint.h> +#include <signal.h> +#include <pthread.h> +#include <sys/ioctl.h> +#include <linux/userfaultfd.h> +#include <setjmp.h> +#include <sys/ptrace.h> +#include <sys/signal.h> +#include <linux/elf.h> + +/* + * Define the ABI defines if needed, so people can run the tests + * without building the headers. + */ +#ifndef __NR_map_shadow_stack +#define __NR_map_shadow_stack 453 + +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) + +#define ARCH_SHSTK_ENABLE 0x5001 +#define ARCH_SHSTK_DISABLE 0x5002 +#define ARCH_SHSTK_LOCK 0x5003 +#define ARCH_SHSTK_UNLOCK 0x5004 +#define ARCH_SHSTK_STATUS 0x5005 + +#define ARCH_SHSTK_SHSTK (1ULL << 0) +#define ARCH_SHSTK_WRSS (1ULL << 1) + +#define NT_X86_SHSTK 0x204 +#endif + +#define SS_SIZE 0x200000 +#define PAGE_SIZE 0x1000 + +#if (__GNUC__ < 8) || (__GNUC__ == 8 && __GNUC_MINOR__ < 5) +int main(int argc, char *argv[]) +{ + printf("[SKIP]\tCompiler does not support CET.\n"); + return 0; +} +#else +void write_shstk(unsigned long *addr, unsigned long val) +{ + asm volatile("wrssq %[val], (%[addr])\n" + : "=m" (addr) + : [addr] "r" (addr), [val] "r" (val)); +} + +static inline unsigned long __attribute__((always_inline)) get_ssp(void) +{ + unsigned long ret = 0; + + asm volatile("xor %0, %0; rdsspq %0" : "=r" (ret)); + return ret; +} + +/* + * For use in inline enablement of shadow stack. + * + * The program can't return from the point where shadow stack gets enabled + * because there will be no address on the shadow stack. So it can't use + * syscall() for enablement, since it is a function. + * + * Based on code from nolibc.h. Keep a copy here because this can't pull in all + * of nolibc.h. + */ +#define ARCH_PRCTL(arg1, arg2) \ +({ \ + long _ret; \ + register long _num asm("eax") = __NR_arch_prctl; \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +void *create_shstk(void *addr) +{ + return (void *)syscall(__NR_map_shadow_stack, addr, SS_SIZE, SHADOW_STACK_SET_TOKEN); +} + +void *create_normal_mem(void *addr) +{ + return mmap(addr, SS_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); +} + +void free_shstk(void *shstk) +{ + munmap(shstk, SS_SIZE); +} + +int reset_shstk(void *shstk) +{ + return madvise(shstk, SS_SIZE, MADV_DONTNEED); +} + +void try_shstk(unsigned long new_ssp) +{ + unsigned long ssp; + + printf("[INFO]\tnew_ssp = %lx, *new_ssp = %lx\n", + new_ssp, *((unsigned long *)new_ssp)); + + ssp = get_ssp(); + printf("[INFO]\tchanging ssp from %lx to %lx\n", ssp, new_ssp); + + asm volatile("rstorssp (%0)\n":: "r" (new_ssp)); + asm volatile("saveprevssp"); + printf("[INFO]\tssp is now %lx\n", get_ssp()); + + /* Switch back to original shadow stack */ + ssp -= 8; + asm volatile("rstorssp (%0)\n":: "r" (ssp)); + asm volatile("saveprevssp"); +} + +int test_shstk_pivot(void) +{ + void *shstk = create_shstk(0); + + if (shstk == MAP_FAILED) { + printf("[FAIL]\tError creating shadow stack: %d\n", errno); + return 1; + } + try_shstk((unsigned long)shstk + SS_SIZE - 8); + free_shstk(shstk); + + printf("[OK]\tShadow stack pivot\n"); + return 0; +} + +int test_shstk_faults(void) +{ + unsigned long *shstk = create_shstk(0); + + /* Read shadow stack, test if it's zero to not get read optimized out */ + if (*shstk != 0) + goto err; + + /* Wrss memory that was already read. */ + write_shstk(shstk, 1); + if (*shstk != 1) + goto err; + + /* Page out memory, so we can wrss it again. */ + if (reset_shstk((void *)shstk)) + goto err; + + write_shstk(shstk, 1); + if (*shstk != 1) + goto err; + + printf("[OK]\tShadow stack faults\n"); + return 0; + +err: + return 1; +} + +unsigned long saved_ssp; +unsigned long saved_ssp_val; +volatile bool segv_triggered; + +void __attribute__((noinline)) violate_ss(void) +{ + saved_ssp = get_ssp(); + saved_ssp_val = *(unsigned long *)saved_ssp; + + /* Corrupt shadow stack */ + printf("[INFO]\tCorrupting shadow stack\n"); + write_shstk((void *)saved_ssp, 0); +} + +void segv_handler(int signum, siginfo_t *si, void *uc) +{ + printf("[INFO]\tGenerated shadow stack violation successfully\n"); + + segv_triggered = true; + + /* Fix shadow stack */ + write_shstk((void *)saved_ssp, saved_ssp_val); +} + +int test_shstk_violation(void) +{ + struct sigaction sa = {}; + + sa.sa_sigaction = segv_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + segv_triggered = false; + + /* Make sure segv_triggered is set before violate_ss() */ + asm volatile("" : : : "memory"); + + violate_ss(); + + signal(SIGSEGV, SIG_DFL); + + printf("[OK]\tShadow stack violation test\n"); + + return !segv_triggered; +} + +/* Gup test state */ +#define MAGIC_VAL 0x12345678 +bool is_shstk_access; +void *shstk_ptr; +int fd; + +void reset_test_shstk(void *addr) +{ + if (shstk_ptr) + free_shstk(shstk_ptr); + shstk_ptr = create_shstk(addr); +} + +void test_access_fix_handler(int signum, siginfo_t *si, void *uc) +{ + printf("[INFO]\tViolation from %s\n", is_shstk_access ? "shstk access" : "normal write"); + + segv_triggered = true; + + /* Fix shadow stack */ + if (is_shstk_access) { + reset_test_shstk(shstk_ptr); + return; + } + + free_shstk(shstk_ptr); + create_normal_mem(shstk_ptr); +} + +bool test_shstk_access(void *ptr) +{ + is_shstk_access = true; + segv_triggered = false; + write_shstk(ptr, MAGIC_VAL); + + asm volatile("" : : : "memory"); + + return segv_triggered; +} + +bool test_write_access(void *ptr) +{ + is_shstk_access = false; + segv_triggered = false; + *(unsigned long *)ptr = MAGIC_VAL; + + asm volatile("" : : : "memory"); + + return segv_triggered; +} + +bool gup_write(void *ptr) +{ + unsigned long val; + + lseek(fd, (unsigned long)ptr, SEEK_SET); + if (write(fd, &val, sizeof(val)) < 0) + return 1; + + return 0; +} + +bool gup_read(void *ptr) +{ + unsigned long val; + + lseek(fd, (unsigned long)ptr, SEEK_SET); + if (read(fd, &val, sizeof(val)) < 0) + return 1; + + return 0; +} + +int test_gup(void) +{ + struct sigaction sa = {}; + int status; + pid_t pid; + + sa.sa_sigaction = test_access_fix_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + segv_triggered = false; + + fd = open("/proc/self/mem", O_RDWR); + if (fd == -1) + return 1; + + reset_test_shstk(0); + if (gup_read(shstk_ptr)) + return 1; + if (test_shstk_access(shstk_ptr)) + return 1; + printf("[INFO]\tGup read -> shstk access success\n"); + + reset_test_shstk(0); + if (gup_write(shstk_ptr)) + return 1; + if (test_shstk_access(shstk_ptr)) + return 1; + printf("[INFO]\tGup write -> shstk access success\n"); + + reset_test_shstk(0); + if (gup_read(shstk_ptr)) + return 1; + if (!test_write_access(shstk_ptr)) + return 1; + printf("[INFO]\tGup read -> write access success\n"); + + reset_test_shstk(0); + if (gup_write(shstk_ptr)) + return 1; + if (!test_write_access(shstk_ptr)) + return 1; + printf("[INFO]\tGup write -> write access success\n"); + + close(fd); + + /* COW/gup test */ + reset_test_shstk(0); + pid = fork(); + if (!pid) { + fd = open("/proc/self/mem", O_RDWR); + if (fd == -1) + exit(1); + + if (gup_write(shstk_ptr)) { + close(fd); + exit(1); + } + close(fd); + exit(0); + } + waitpid(pid, &status, 0); + if (WEXITSTATUS(status)) { + printf("[FAIL]\tWrite in child failed\n"); + return 1; + } + if (*(unsigned long *)shstk_ptr == MAGIC_VAL) { + printf("[FAIL]\tWrite in child wrote through to shared memory\n"); + return 1; + } + + printf("[INFO]\tCow gup write -> write access success\n"); + + free_shstk(shstk_ptr); + + signal(SIGSEGV, SIG_DFL); + + printf("[OK]\tShadow gup test\n"); + + return 0; +} + +int test_mprotect(void) +{ + struct sigaction sa = {}; + + sa.sa_sigaction = test_access_fix_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + segv_triggered = false; + + /* mprotect a shadow stack as read only */ + reset_test_shstk(0); + if (mprotect(shstk_ptr, SS_SIZE, PROT_READ) < 0) { + printf("[FAIL]\tmprotect(PROT_READ) failed\n"); + return 1; + } + + /* try to wrss it and fail */ + if (!test_shstk_access(shstk_ptr)) { + printf("[FAIL]\tShadow stack access to read-only memory succeeded\n"); + return 1; + } + + /* + * The shadow stack was reset above to resolve the fault, make the new one + * read-only. + */ + if (mprotect(shstk_ptr, SS_SIZE, PROT_READ) < 0) { + printf("[FAIL]\tmprotect(PROT_READ) failed\n"); + return 1; + } + + /* then back to writable */ + if (mprotect(shstk_ptr, SS_SIZE, PROT_WRITE | PROT_READ) < 0) { + printf("[FAIL]\tmprotect(PROT_WRITE) failed\n"); + return 1; + } + + /* then wrss to it and succeed */ + if (test_shstk_access(shstk_ptr)) { + printf("[FAIL]\tShadow stack access to mprotect() writable memory failed\n"); + return 1; + } + + free_shstk(shstk_ptr); + + signal(SIGSEGV, SIG_DFL); + + printf("[OK]\tmprotect() test\n"); + + return 0; +} + +char zero[4096]; + +static void *uffd_thread(void *arg) +{ + struct uffdio_copy req; + int uffd = *(int *)arg; + struct uffd_msg msg; + int ret; + + while (1) { + ret = read(uffd, &msg, sizeof(msg)); + if (ret > 0) + break; + else if (errno == EAGAIN) + continue; + return (void *)1; + } + + req.dst = msg.arg.pagefault.address; + req.src = (__u64)zero; + req.len = 4096; + req.mode = 0; + + if (ioctl(uffd, UFFDIO_COPY, &req)) + return (void *)1; + + return (void *)0; +} + +int test_userfaultfd(void) +{ + struct uffdio_register uffdio_register; + struct uffdio_api uffdio_api; + struct sigaction sa = {}; + pthread_t thread; + void *res; + int uffd; + + sa.sa_sigaction = test_access_fix_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + printf("[SKIP]\tUserfaultfd unavailable.\n"); + return 0; + } + + reset_test_shstk(0); + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffdio_api)) + goto err; + + uffdio_register.range.start = (__u64)shstk_ptr; + uffdio_register.range.len = 4096; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) + goto err; + + if (pthread_create(&thread, NULL, &uffd_thread, &uffd)) + goto err; + + reset_shstk(shstk_ptr); + test_shstk_access(shstk_ptr); + + if (pthread_join(thread, &res)) + goto err; + + if (test_shstk_access(shstk_ptr)) + goto err; + + free_shstk(shstk_ptr); + + signal(SIGSEGV, SIG_DFL); + + if (!res) + printf("[OK]\tUserfaultfd test\n"); + return !!res; +err: + free_shstk(shstk_ptr); + close(uffd); + signal(SIGSEGV, SIG_DFL); + return 1; +} + +/* Simple linked list for keeping track of mappings in test_guard_gap() */ +struct node { + struct node *next; + void *mapping; +}; + +/* + * This tests whether mmap will place other mappings in a shadow stack's guard + * gap. The steps are: + * 1. Finds an empty place by mapping and unmapping something. + * 2. Map a shadow stack in the middle of the known empty area. + * 3. Map a bunch of PAGE_SIZE mappings. These will use the search down + * direction, filling any gaps until it encounters the shadow stack's + * guard gap. + * 4. When a mapping lands below the shadow stack from step 2, then all + * of the above gaps are filled. The search down algorithm will have + * looked at the shadow stack gaps. + * 5. See if it landed in the gap. + */ +int test_guard_gap(void) +{ + void *free_area, *shstk, *test_map = (void *)0xFFFFFFFFFFFFFFFF; + struct node *head = NULL, *cur; + + free_area = mmap(0, SS_SIZE * 3, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + munmap(free_area, SS_SIZE * 3); + + shstk = create_shstk(free_area + SS_SIZE); + if (shstk == MAP_FAILED) + return 1; + + while (test_map > shstk) { + test_map = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (test_map == MAP_FAILED) + return 1; + cur = malloc(sizeof(*cur)); + cur->mapping = test_map; + + cur->next = head; + head = cur; + } + + while (head) { + cur = head; + head = cur->next; + munmap(cur->mapping, PAGE_SIZE); + free(cur); + } + + free_shstk(shstk); + + if (shstk - test_map - PAGE_SIZE != PAGE_SIZE) + return 1; + + printf("[OK]\tGuard gap test\n"); + + return 0; +} + +/* + * Too complicated to pull it out of the 32 bit header, but also get the + * 64 bit one needed above. Just define a copy here. + */ +#define __NR_compat_sigaction 67 + +/* + * Call 32 bit signal handler to get 32 bit signals ABI. Make sure + * to push the registers that will get clobbered. + */ +int sigaction32(int signum, const struct sigaction *restrict act, + struct sigaction *restrict oldact) +{ + register long syscall_reg asm("eax") = __NR_compat_sigaction; + register long signum_reg asm("ebx") = signum; + register long act_reg asm("ecx") = (long)act; + register long oldact_reg asm("edx") = (long)oldact; + int ret = 0; + + asm volatile ("int $0x80;" + : "=a"(ret), "=m"(oldact) + : "r"(syscall_reg), "r"(signum_reg), "r"(act_reg), + "r"(oldact_reg) + : "r8", "r9", "r10", "r11" + ); + + return ret; +} + +sigjmp_buf jmp_buffer; + +void segv_gp_handler(int signum, siginfo_t *si, void *uc) +{ + segv_triggered = true; + + /* + * To work with old glibc, this can't rely on siglongjmp working with + * shadow stack enabled, so disable shadow stack before siglongjmp(). + */ + ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK); + siglongjmp(jmp_buffer, -1); +} + +/* + * Transition to 32 bit mode and check that a #GP triggers a segfault. + */ +int test_32bit(void) +{ + struct sigaction sa = {}; + struct sigaction *sa32; + + /* Create sigaction in 32 bit address range */ + sa32 = mmap(0, 4096, PROT_READ | PROT_WRITE, + MAP_32BIT | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + sa32->sa_flags = SA_SIGINFO; + + sa.sa_sigaction = segv_gp_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + + segv_triggered = false; + + /* Make sure segv_triggered is set before triggering the #GP */ + asm volatile("" : : : "memory"); + + /* + * Set handler to somewhere in 32 bit address space + */ + sa32->sa_handler = (void *)sa32; + if (sigaction32(SIGUSR1, sa32, NULL)) + return 1; + + if (!sigsetjmp(jmp_buffer, 1)) + raise(SIGUSR1); + + if (segv_triggered) + printf("[OK]\t32 bit test\n"); + + return !segv_triggered; +} + +void segv_handler_ptrace(int signum, siginfo_t *si, void *uc) +{ + /* The SSP adjustment caused a segfault. */ + exit(0); +} + +int test_ptrace(void) +{ + unsigned long saved_ssp, ssp = 0; + struct sigaction sa= {}; + struct iovec iov; + int status; + int pid; + + iov.iov_base = &ssp; + iov.iov_len = sizeof(ssp); + + pid = fork(); + if (!pid) { + ssp = get_ssp(); + + sa.sa_sigaction = segv_handler_ptrace; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + ptrace(PTRACE_TRACEME, NULL, NULL, NULL); + /* + * The parent will tweak the SSP and return from this function + * will #CP. + */ + raise(SIGTRAP); + + exit(1); + } + + while (waitpid(pid, &status, 0) != -1 && WSTOPSIG(status) != SIGTRAP); + + if (ptrace(PTRACE_GETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tFailed to PTRACE_GETREGS\n"); + goto out_kill; + } + + if (!ssp) { + printf("[INFO]\tPtrace child SSP was 0\n"); + goto out_kill; + } + + saved_ssp = ssp; + + iov.iov_len = 0; + if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tToo small size accepted via PTRACE_SETREGS\n"); + goto out_kill; + } + + iov.iov_len = sizeof(ssp) + 1; + if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tToo large size accepted via PTRACE_SETREGS\n"); + goto out_kill; + } + + ssp += 1; + if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tUnaligned SSP written via PTRACE_SETREGS\n"); + goto out_kill; + } + + ssp = 0xFFFFFFFFFFFF0000; + if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tKernel range SSP written via PTRACE_SETREGS\n"); + goto out_kill; + } + + /* + * Tweak the SSP so the child with #CP when it resumes and returns + * from raise() + */ + ssp = saved_ssp + 8; + iov.iov_len = sizeof(ssp); + if (ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tFailed to PTRACE_SETREGS\n"); + goto out_kill; + } + + if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) { + printf("[INFO]\tFailed to PTRACE_DETACH\n"); + goto out_kill; + } + + waitpid(pid, &status, 0); + if (WEXITSTATUS(status)) + return 1; + + printf("[OK]\tPtrace test\n"); + return 0; + +out_kill: + kill(pid, SIGKILL); + return 1; +} + +int main(int argc, char *argv[]) +{ + int ret = 0; + + if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) { + printf("[SKIP]\tCould not enable Shadow stack\n"); + return 1; + } + + if (ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) { + ret = 1; + printf("[FAIL]\tDisabling shadow stack failed\n"); + } + + if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) { + printf("[SKIP]\tCould not re-enable Shadow stack\n"); + return 1; + } + + if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_WRSS)) { + printf("[SKIP]\tCould not enable WRSS\n"); + ret = 1; + goto out; + } + + /* Should have succeeded if here, but this is a test, so double check. */ + if (!get_ssp()) { + printf("[FAIL]\tShadow stack disabled\n"); + return 1; + } + + if (test_shstk_pivot()) { + ret = 1; + printf("[FAIL]\tShadow stack pivot\n"); + goto out; + } + + if (test_shstk_faults()) { + ret = 1; + printf("[FAIL]\tShadow stack fault test\n"); + goto out; + } + + if (test_shstk_violation()) { + ret = 1; + printf("[FAIL]\tShadow stack violation test\n"); + goto out; + } + + if (test_gup()) { + ret = 1; + printf("[FAIL]\tShadow shadow stack gup\n"); + goto out; + } + + if (test_mprotect()) { + ret = 1; + printf("[FAIL]\tShadow shadow mprotect test\n"); + goto out; + } + + if (test_userfaultfd()) { + ret = 1; + printf("[FAIL]\tUserfaultfd test\n"); + goto out; + } + + if (test_guard_gap()) { + ret = 1; + printf("[FAIL]\tGuard gap test\n"); + goto out; + } + + if (test_ptrace()) { + ret = 1; + printf("[FAIL]\tptrace test\n"); + } + + if (test_32bit()) { + ret = 1; + printf("[FAIL]\t32 bit test\n"); + goto out; + } + + return ret; + +out: + /* + * Disable shadow stack before the function returns, or there will be a + * shadow stack violation. + */ + if (ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) { + ret = 1; + printf("[FAIL]\tDisabling shadow stack failed\n"); + } + + return ret; +} +#endif diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c deleted file mode 100644 index 42052db0f870..000000000000 --- a/tools/testing/selftests/x86/test_vdso.c +++ /dev/null @@ -1,342 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ldt_gdt.c - Test cases for LDT and GDT access - * Copyright (c) 2011-2015 Andrew Lutomirski - */ - -#define _GNU_SOURCE - -#include <stdio.h> -#include <sys/time.h> -#include <time.h> -#include <stdlib.h> -#include <unistd.h> -#include <sys/syscall.h> -#include <dlfcn.h> -#include <string.h> -#include <errno.h> -#include <sched.h> -#include <stdbool.h> -#include <limits.h> - -#ifndef SYS_getcpu -# ifdef __x86_64__ -# define SYS_getcpu 309 -# else -# define SYS_getcpu 318 -# endif -#endif - -/* max length of lines in /proc/self/maps - anything longer is skipped here */ -#define MAPS_LINE_LEN 128 - -int nerrs = 0; - -typedef int (*vgettime_t)(clockid_t, struct timespec *); - -vgettime_t vdso_clock_gettime; - -typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz); - -vgtod_t vdso_gettimeofday; - -typedef long (*getcpu_t)(unsigned *, unsigned *, void *); - -getcpu_t vgetcpu; -getcpu_t vdso_getcpu; - -static void *vsyscall_getcpu(void) -{ -#ifdef __x86_64__ - FILE *maps; - char line[MAPS_LINE_LEN]; - bool found = false; - - maps = fopen("/proc/self/maps", "r"); - if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */ - return NULL; - - while (fgets(line, MAPS_LINE_LEN, maps)) { - char r, x; - void *start, *end; - char name[MAPS_LINE_LEN]; - - /* sscanf() is safe here as strlen(name) >= strlen(line) */ - if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s", - &start, &end, &r, &x, name) != 5) - continue; - - if (strcmp(name, "[vsyscall]")) - continue; - - /* assume entries are OK, as we test vDSO here not vsyscall */ - found = true; - break; - } - - fclose(maps); - - if (!found) { - printf("Warning: failed to find vsyscall getcpu\n"); - return NULL; - } - return (void *) (0xffffffffff600800); -#else - return NULL; -#endif -} - - -static void fill_function_pointers() -{ - void *vdso = dlopen("linux-vdso.so.1", - RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); - if (!vdso) - vdso = dlopen("linux-gate.so.1", - RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); - if (!vdso) { - printf("[WARN]\tfailed to find vDSO\n"); - return; - } - - vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); - if (!vdso_getcpu) - printf("Warning: failed to find getcpu in vDSO\n"); - - vgetcpu = (getcpu_t) vsyscall_getcpu(); - - vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); - if (!vdso_clock_gettime) - printf("Warning: failed to find clock_gettime in vDSO\n"); - - vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday"); - if (!vdso_gettimeofday) - printf("Warning: failed to find gettimeofday in vDSO\n"); - -} - -static long sys_getcpu(unsigned * cpu, unsigned * node, - void* cache) -{ - return syscall(__NR_getcpu, cpu, node, cache); -} - -static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) -{ - return syscall(__NR_clock_gettime, id, ts); -} - -static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz) -{ - return syscall(__NR_gettimeofday, tv, tz); -} - -static void test_getcpu(void) -{ - printf("[RUN]\tTesting getcpu...\n"); - - for (int cpu = 0; ; cpu++) { - cpu_set_t cpuset; - CPU_ZERO(&cpuset); - CPU_SET(cpu, &cpuset); - if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) - return; - - unsigned cpu_sys, cpu_vdso, cpu_vsys, - node_sys, node_vdso, node_vsys; - long ret_sys, ret_vdso = 1, ret_vsys = 1; - unsigned node; - - ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); - if (vdso_getcpu) - ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); - if (vgetcpu) - ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); - - if (!ret_sys) - node = node_sys; - else if (!ret_vdso) - node = node_vdso; - else if (!ret_vsys) - node = node_vsys; - - bool ok = true; - if (!ret_sys && (cpu_sys != cpu || node_sys != node)) - ok = false; - if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node)) - ok = false; - if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node)) - ok = false; - - printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu); - if (!ret_sys) - printf(" syscall: cpu %u, node %u", cpu_sys, node_sys); - if (!ret_vdso) - printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso); - if (!ret_vsys) - printf(" vsyscall: cpu %u, node %u", cpu_vsys, - node_vsys); - printf("\n"); - - if (!ok) - nerrs++; - } -} - -static bool ts_leq(const struct timespec *a, const struct timespec *b) -{ - if (a->tv_sec != b->tv_sec) - return a->tv_sec < b->tv_sec; - else - return a->tv_nsec <= b->tv_nsec; -} - -static bool tv_leq(const struct timeval *a, const struct timeval *b) -{ - if (a->tv_sec != b->tv_sec) - return a->tv_sec < b->tv_sec; - else - return a->tv_usec <= b->tv_usec; -} - -static char const * const clocknames[] = { - [0] = "CLOCK_REALTIME", - [1] = "CLOCK_MONOTONIC", - [2] = "CLOCK_PROCESS_CPUTIME_ID", - [3] = "CLOCK_THREAD_CPUTIME_ID", - [4] = "CLOCK_MONOTONIC_RAW", - [5] = "CLOCK_REALTIME_COARSE", - [6] = "CLOCK_MONOTONIC_COARSE", - [7] = "CLOCK_BOOTTIME", - [8] = "CLOCK_REALTIME_ALARM", - [9] = "CLOCK_BOOTTIME_ALARM", - [10] = "CLOCK_SGI_CYCLE", - [11] = "CLOCK_TAI", -}; - -static void test_one_clock_gettime(int clock, const char *name) -{ - struct timespec start, vdso, end; - int vdso_ret, end_ret; - - printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock); - - if (sys_clock_gettime(clock, &start) < 0) { - if (errno == EINVAL) { - vdso_ret = vdso_clock_gettime(clock, &vdso); - if (vdso_ret == -EINVAL) { - printf("[OK]\tNo such clock.\n"); - } else { - printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret); - nerrs++; - } - } else { - printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno); - } - return; - } - - vdso_ret = vdso_clock_gettime(clock, &vdso); - end_ret = sys_clock_gettime(clock, &end); - - if (vdso_ret != 0 || end_ret != 0) { - printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", - vdso_ret, errno); - nerrs++; - return; - } - - printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", - (unsigned long long)start.tv_sec, start.tv_nsec, - (unsigned long long)vdso.tv_sec, vdso.tv_nsec, - (unsigned long long)end.tv_sec, end.tv_nsec); - - if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) { - printf("[FAIL]\tTimes are out of sequence\n"); - nerrs++; - } -} - -static void test_clock_gettime(void) -{ - if (!vdso_clock_gettime) { - printf("[SKIP]\tNo vDSO, so skipping clock_gettime() tests\n"); - return; - } - - for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); - clock++) { - test_one_clock_gettime(clock, clocknames[clock]); - } - - /* Also test some invalid clock ids */ - test_one_clock_gettime(-1, "invalid"); - test_one_clock_gettime(INT_MIN, "invalid"); - test_one_clock_gettime(INT_MAX, "invalid"); -} - -static void test_gettimeofday(void) -{ - struct timeval start, vdso, end; - struct timezone sys_tz, vdso_tz; - int vdso_ret, end_ret; - - if (!vdso_gettimeofday) - return; - - printf("[RUN]\tTesting gettimeofday...\n"); - - if (sys_gettimeofday(&start, &sys_tz) < 0) { - printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno); - nerrs++; - return; - } - - vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz); - end_ret = sys_gettimeofday(&end, NULL); - - if (vdso_ret != 0 || end_ret != 0) { - printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", - vdso_ret, errno); - nerrs++; - return; - } - - printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n", - (unsigned long long)start.tv_sec, start.tv_usec, - (unsigned long long)vdso.tv_sec, vdso.tv_usec, - (unsigned long long)end.tv_sec, end.tv_usec); - - if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) { - printf("[FAIL]\tTimes are out of sequence\n"); - nerrs++; - } - - if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest && - sys_tz.tz_dsttime == vdso_tz.tz_dsttime) { - printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n", - sys_tz.tz_minuteswest, sys_tz.tz_dsttime); - } else { - printf("[FAIL]\ttimezones do not match\n"); - nerrs++; - } - - /* And make sure that passing NULL for tz doesn't crash. */ - vdso_gettimeofday(&vdso, NULL); -} - -int main(int argc, char **argv) -{ - fill_function_pointers(); - - test_clock_gettime(); - test_gettimeofday(); - - /* - * Test getcpu() last so that, if something goes wrong setting affinity, - * we still run the other tests. - */ - test_getcpu(); - - return nerrs ? 1 : 0; -} diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index a4f4d4cf22c3..47cab972807c 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -20,6 +20,8 @@ #include <setjmp.h> #include <sys/uio.h> +#include "helpers.h" + #ifdef __x86_64__ # define VSYS(x) (x) #else @@ -90,11 +92,8 @@ static void init_vdso(void) printf("[WARN]\tfailed to find time in vDSO\n"); vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu"); - if (!vdso_getcpu) { - /* getcpu() was never wired up in the 32-bit vDSO. */ - printf("[%s]\tfailed to find getcpu in vDSO\n", - sizeof(long) == 8 ? "WARN" : "NOTE"); - } + if (!vdso_getcpu) + printf("[WARN]\tfailed to find getcpu in vDSO\n"); } static int init_vsys(void) @@ -460,6 +459,17 @@ static int test_vsys_x(void) return 0; } +/* + * Debuggers expect ptrace() to be able to peek at the vsyscall page. + * Use process_vm_readv() as a proxy for ptrace() to test this. We + * want it to work in the vsyscall=emulate case and to fail in the + * vsyscall=xonly case. + * + * It's worth noting that this ABI is a bit nutty. write(2) can't + * read from the vsyscall page on any kernel version or mode. The + * fact that ptrace() ever worked was a nice courtesy of old kernels, + * but the code to support it is fairly gross. + */ static int test_process_vm_readv(void) { #ifdef __x86_64__ @@ -475,17 +485,24 @@ static int test_process_vm_readv(void) remote.iov_len = 4096; ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0); if (ret != 4096) { - printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno); - return 0; + /* + * We expect process_vm_readv() to work if and only if the + * vsyscall page is readable. + */ + printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno); + return vsyscall_map_r ? 1 : 0; } if (vsyscall_map_r) { - if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) { + if (!memcmp(buf, remote.iov_base, sizeof(buf))) { printf("[OK]\tIt worked and read correct data\n"); } else { printf("[FAIL]\tIt worked but returned incorrect data\n"); return 1; } + } else { + printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n"); + return 1; } #endif @@ -493,21 +510,8 @@ static int test_process_vm_readv(void) } #ifdef __x86_64__ -#define X86_EFLAGS_TF (1UL << 8) static volatile sig_atomic_t num_vsyscall_traps; -static unsigned long get_eflags(void) -{ - unsigned long eflags; - asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags)); - return eflags; -} - -static void set_eflags(unsigned long eflags) -{ - asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags"); -} - static void sigtrap(int sig, siginfo_t *info, void *ctx_void) { ucontext_t *ctx = (ucontext_t *)ctx_void; diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S index 1bb5d62c16a4..a2d47d8344d4 100644 --- a/tools/testing/selftests/x86/thunks.S +++ b/tools/testing/selftests/x86/thunks.S @@ -57,3 +57,5 @@ call32_from_64: ret .size call32_from_64, .-call32_from_64 + +.section .note.GNU-stack,"",%progbits diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S index a71d92da8f46..f3f56e681e9f 100644 --- a/tools/testing/selftests/x86/thunks_32.S +++ b/tools/testing/selftests/x86/thunks_32.S @@ -45,3 +45,5 @@ call64_from_32: ret .size call64_from_32, .-call64_from_32 + +.section .note.GNU-stack,"",%progbits diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c index 0075ccd65407..4c311e1af4c7 100644 --- a/tools/testing/selftests/x86/unwind_vdso.c +++ b/tools/testing/selftests/x86/unwind_vdso.c @@ -11,6 +11,8 @@ #include <features.h> #include <stdio.h> +#include "helpers.h" + #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16 int main() @@ -53,27 +55,6 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), err(1, "sigaction"); } -#ifdef __x86_64__ -# define WIDTH "q" -#else -# define WIDTH "l" -#endif - -static unsigned long get_eflags(void) -{ - unsigned long eflags; - asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); - return eflags; -} - -static void set_eflags(unsigned long eflags) -{ - asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH - : : "rm" (eflags) : "flags"); -} - -#define X86_EFLAGS_TF (1UL << 8) - static volatile sig_atomic_t nerrs; static unsigned long sysinfo; static bool got_sysinfo = false; |