diff options
Diffstat (limited to 'tools')
161 files changed, 6934 insertions, 3266 deletions
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 1f6d904c6481..798e60b5454b 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -434,6 +434,7 @@ #define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ #define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ diff --git a/tools/arch/x86/include/asm/irq_vectors.h b/tools/arch/x86/include/asm/irq_vectors.h index 43dcb9284208..3a19904c2db6 100644 --- a/tools/arch/x86/include/asm/irq_vectors.h +++ b/tools/arch/x86/include/asm/irq_vectors.h @@ -35,13 +35,6 @@ */ #define FIRST_EXTERNAL_VECTOR 0x20 -/* - * Reserve the lowest usable vector (and hence lowest priority) 0x20 for - * triggering cleanup after irq migration. 0x21-0x2f will still be used - * for device interrupts. - */ -#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR - #define IA32_SYSCALL_VECTOR 0x80 /* diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py index 0dbbc67400fc..933c750b319b 100644 --- a/tools/cgroup/iocost_monitor.py +++ b/tools/cgroup/iocost_monitor.py @@ -100,6 +100,7 @@ class IocStat: self.period_at = ioc.period_at.value_() / 1_000_000 self.vperiod_at = ioc.period_at_vtime.value_() / VTIME_PER_SEC self.vrate_pct = ioc.vtime_base_rate.value_() * 100 / VTIME_PER_USEC + self.ivrate_pct = ioc.vtime_rate.counter.value_() * 100 / VTIME_PER_USEC self.busy_level = ioc.busy_level.value_() self.autop_idx = ioc.autop_idx.value_() self.user_cost_model = ioc.user_cost_model.value_() @@ -119,7 +120,9 @@ class IocStat: 'period_at' : self.period_at, 'period_vtime_at' : self.vperiod_at, 'busy_level' : self.busy_level, - 'vrate_pct' : self.vrate_pct, } + 'vrate_pct' : self.vrate_pct, + 'ivrate_pct' : self.ivrate_pct, + } def table_preamble_str(self): state = ('RUN' if self.running else 'IDLE') if self.enabled else 'OFF' @@ -127,7 +130,7 @@ class IocStat: f'per={self.period_ms}ms ' \ f'cur_per={self.period_at:.3f}:v{self.vperiod_at:.3f} ' \ f'busy={self.busy_level:+3} ' \ - f'vrate={self.vrate_pct:6.2f}% ' \ + f'vrate={self.vrate_pct:6.2f}%:{self.ivrate_pct:6.2f}% ' \ f'params={self.autop_name}' if self.user_cost_model or self.user_qos_params: output += f'({"C" if self.user_cost_model else ""}{"Q" if self.user_qos_params else ""})' @@ -135,7 +138,7 @@ class IocStat: def table_header_str(self): return f'{"":25} active {"weight":>9} {"hweight%":>13} {"inflt%":>6} ' \ - f'{"debt":>7} {"delay":>7} {"usage%"}' + f'{"usage%":>6} {"wait":>7} {"debt":>7} {"delay":>7}' class IocgStat: def __init__(self, iocg): @@ -161,6 +164,8 @@ class IocgStat: self.usage = (100 * iocg.usage_delta_us.value_() / ioc.period_us.value_()) if self.active else 0 + self.wait_ms = (iocg.stat.wait_us.value_() - + iocg.last_stat.wait_us.value_()) / 1000 self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000 if blkg.use_delay.counter.value_() != 0: self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000 @@ -177,9 +182,10 @@ class IocgStat: 'hweight_active_pct' : self.hwa_pct, 'hweight_inuse_pct' : self.hwi_pct, 'inflight_pct' : self.inflight_pct, + 'usage_pct' : self.usage, + 'wait_ms' : self.wait_ms, 'debt_ms' : self.debt_ms, 'delay_ms' : self.delay_ms, - 'usage_pct' : self.usage, 'address' : self.address } return out @@ -189,9 +195,10 @@ class IocgStat: f'{round(self.inuse):5}/{round(self.active):5} ' \ f'{self.hwi_pct:6.2f}/{self.hwa_pct:6.2f} ' \ f'{self.inflight_pct:6.2f} ' \ + f'{min(self.usage, 999):6.2f} ' \ + f'{self.wait_ms:7.2f} ' \ f'{self.debt_ms:7.2f} ' \ - f'{self.delay_ms:7.2f} '\ - f'{min(self.usage, 999):6.2f}' + f'{self.delay_ms:7.2f}' out = out.rstrip(':') return out @@ -221,7 +228,7 @@ ioc = None for i, ptr in radix_tree_for_each(blkcg_root.blkg_tree.address_of_()): blkg = drgn.Object(prog, 'struct blkcg_gq', address=ptr) try: - if devname == blkg.q.kobj.parent.name.string_().decode('utf-8'): + if devname == blkg.q.mq_kobj.parent.name.string_().decode('utf-8'): q_id = blkg.q.id.value_() if blkg.pd[plid]: root_iocg = container_of(blkg.pd[plid], 'struct ioc_gq', 'pd') diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index f8deae4e26a1..44bbf80f0cfd 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -51,9 +51,9 @@ enum autochan { * Has the side effect of filling the channels[i].location values used * in processing the buffer output. **/ -static int size_from_channelarray(struct iio_channel_info *channels, int num_channels) +static unsigned int size_from_channelarray(struct iio_channel_info *channels, int num_channels) { - int bytes = 0; + unsigned int bytes = 0; int i = 0; while (i < num_channels) { @@ -348,7 +348,7 @@ int main(int argc, char **argv) ssize_t read_size; int dev_num = -1, trig_num = -1; char *buffer_access = NULL; - int scan_size; + unsigned int scan_size; int noevents = 0; int notrigger = 0; char *dummy; @@ -674,7 +674,16 @@ int main(int argc, char **argv) } scan_size = size_from_channelarray(channels, num_channels); - data = malloc(scan_size * buf_len); + + size_t total_buf_len = scan_size * buf_len; + + if (scan_size > 0 && total_buf_len / scan_size != buf_len) { + ret = -EFAULT; + perror("Integer overflow happened when calculate scan_size * buf_len"); + goto error; + } + + data = malloc(total_buf_len); if (!data) { ret = -ENOMEM; goto error; diff --git a/tools/io_uring/Makefile b/tools/io_uring/Makefile deleted file mode 100644 index 00f146c54c53..000000000000 --- a/tools/io_uring/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# Makefile for io_uring test tools -CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE -LDLIBS += -lpthread - -all: io_uring-cp io_uring-bench -%: %.c - $(CC) $(CFLAGS) -o $@ $^ - -io_uring-bench: syscall.o io_uring-bench.o - $(CC) $(CFLAGS) -o $@ $^ $(LDLIBS) - -io_uring-cp: setup.o syscall.o queue.o - -clean: - $(RM) io_uring-cp io_uring-bench *.o - -.PHONY: all clean diff --git a/tools/io_uring/README b/tools/io_uring/README deleted file mode 100644 index 67fd70115cff..000000000000 --- a/tools/io_uring/README +++ /dev/null @@ -1,29 +0,0 @@ -This directory includes a few programs that demonstrate how to use io_uring -in an application. The examples are: - -io_uring-cp - A very basic io_uring implementation of cp(1). It takes two - arguments, copies the first argument to the second. This example - is part of liburing, and hence uses the simplified liburing API - for setting up an io_uring instance, submitting IO, completing IO, - etc. The support functions in queue.c and setup.c are straight - out of liburing. - -io_uring-bench - Benchmark program that does random reads on a number of files. This - app demonstrates the various features of io_uring, like fixed files, - fixed buffers, and polled IO. There are options in the program to - control which features to use. Arguments is the file (or files) that - io_uring-bench should operate on. This uses the raw io_uring - interface. - -liburing can be cloned with git here: - - git://git.kernel.dk/liburing - -and contains a number of unit tests as well for testing io_uring. It also -comes with man pages for the three system calls. - -Fio includes an io_uring engine, you can clone fio here: - - git://git.kernel.dk/fio diff --git a/tools/io_uring/barrier.h b/tools/io_uring/barrier.h deleted file mode 100644 index ef00f6722ba9..000000000000 --- a/tools/io_uring/barrier.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef LIBURING_BARRIER_H -#define LIBURING_BARRIER_H - -#if defined(__x86_64) || defined(__i386__) -#define read_barrier() __asm__ __volatile__("":::"memory") -#define write_barrier() __asm__ __volatile__("":::"memory") -#else -/* - * Add arch appropriate definitions. Be safe and use full barriers for - * archs we don't have support for. - */ -#define read_barrier() __sync_synchronize() -#define write_barrier() __sync_synchronize() -#endif - -#endif diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c deleted file mode 100644 index 7703f0118385..000000000000 --- a/tools/io_uring/io_uring-bench.c +++ /dev/null @@ -1,592 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Simple benchmark program that uses the various features of io_uring - * to provide fast random access to a device/file. It has various - * options that are control how we use io_uring, see the OPTIONS section - * below. This uses the raw io_uring interface. - * - * Copyright (C) 2018-2019 Jens Axboe - */ -#include <stdio.h> -#include <errno.h> -#include <assert.h> -#include <stdlib.h> -#include <stddef.h> -#include <signal.h> -#include <inttypes.h> - -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/ioctl.h> -#include <sys/syscall.h> -#include <sys/resource.h> -#include <sys/mman.h> -#include <sys/uio.h> -#include <linux/fs.h> -#include <fcntl.h> -#include <unistd.h> -#include <string.h> -#include <pthread.h> -#include <sched.h> - -#include "liburing.h" -#include "barrier.h" - -#define min(a, b) ((a < b) ? (a) : (b)) - -struct io_sq_ring { - unsigned *head; - unsigned *tail; - unsigned *ring_mask; - unsigned *ring_entries; - unsigned *flags; - unsigned *array; -}; - -struct io_cq_ring { - unsigned *head; - unsigned *tail; - unsigned *ring_mask; - unsigned *ring_entries; - struct io_uring_cqe *cqes; -}; - -#define DEPTH 128 - -#define BATCH_SUBMIT 32 -#define BATCH_COMPLETE 32 - -#define BS 4096 - -#define MAX_FDS 16 - -static unsigned sq_ring_mask, cq_ring_mask; - -struct file { - unsigned long max_blocks; - unsigned pending_ios; - int real_fd; - int fixed_fd; -}; - -struct submitter { - pthread_t thread; - int ring_fd; - struct drand48_data rand; - struct io_sq_ring sq_ring; - struct io_uring_sqe *sqes; - struct iovec iovecs[DEPTH]; - struct io_cq_ring cq_ring; - int inflight; - unsigned long reaps; - unsigned long done; - unsigned long calls; - volatile int finish; - - __s32 *fds; - - struct file files[MAX_FDS]; - unsigned nr_files; - unsigned cur_file; -}; - -static struct submitter submitters[1]; -static volatile int finish; - -/* - * OPTIONS: Set these to test the various features of io_uring. - */ -static int polled = 1; /* use IO polling */ -static int fixedbufs = 1; /* use fixed user buffers */ -static int register_files = 1; /* use fixed files */ -static int buffered = 0; /* use buffered IO, not O_DIRECT */ -static int sq_thread_poll = 0; /* use kernel submission/poller thread */ -static int sq_thread_cpu = -1; /* pin above thread to this CPU */ -static int do_nop = 0; /* no-op SQ ring commands */ - -static int io_uring_register_buffers(struct submitter *s) -{ - if (do_nop) - return 0; - - return io_uring_register(s->ring_fd, IORING_REGISTER_BUFFERS, s->iovecs, - DEPTH); -} - -static int io_uring_register_files(struct submitter *s) -{ - unsigned i; - - if (do_nop) - return 0; - - s->fds = calloc(s->nr_files, sizeof(__s32)); - for (i = 0; i < s->nr_files; i++) { - s->fds[i] = s->files[i].real_fd; - s->files[i].fixed_fd = i; - } - - return io_uring_register(s->ring_fd, IORING_REGISTER_FILES, s->fds, - s->nr_files); -} - -static int lk_gettid(void) -{ - return syscall(__NR_gettid); -} - -static unsigned file_depth(struct submitter *s) -{ - return (DEPTH + s->nr_files - 1) / s->nr_files; -} - -static void init_io(struct submitter *s, unsigned index) -{ - struct io_uring_sqe *sqe = &s->sqes[index]; - unsigned long offset; - struct file *f; - long r; - - if (do_nop) { - sqe->opcode = IORING_OP_NOP; - return; - } - - if (s->nr_files == 1) { - f = &s->files[0]; - } else { - f = &s->files[s->cur_file]; - if (f->pending_ios >= file_depth(s)) { - s->cur_file++; - if (s->cur_file == s->nr_files) - s->cur_file = 0; - f = &s->files[s->cur_file]; - } - } - f->pending_ios++; - - lrand48_r(&s->rand, &r); - offset = (r % (f->max_blocks - 1)) * BS; - - if (register_files) { - sqe->flags = IOSQE_FIXED_FILE; - sqe->fd = f->fixed_fd; - } else { - sqe->flags = 0; - sqe->fd = f->real_fd; - } - if (fixedbufs) { - sqe->opcode = IORING_OP_READ_FIXED; - sqe->addr = (unsigned long) s->iovecs[index].iov_base; - sqe->len = BS; - sqe->buf_index = index; - } else { - sqe->opcode = IORING_OP_READV; - sqe->addr = (unsigned long) &s->iovecs[index]; - sqe->len = 1; - sqe->buf_index = 0; - } - sqe->ioprio = 0; - sqe->off = offset; - sqe->user_data = (unsigned long) f; -} - -static int prep_more_ios(struct submitter *s, unsigned max_ios) -{ - struct io_sq_ring *ring = &s->sq_ring; - unsigned index, tail, next_tail, prepped = 0; - - next_tail = tail = *ring->tail; - do { - next_tail++; - read_barrier(); - if (next_tail == *ring->head) - break; - - index = tail & sq_ring_mask; - init_io(s, index); - ring->array[index] = index; - prepped++; - tail = next_tail; - } while (prepped < max_ios); - - if (*ring->tail != tail) { - /* order tail store with writes to sqes above */ - write_barrier(); - *ring->tail = tail; - write_barrier(); - } - return prepped; -} - -static int get_file_size(struct file *f) -{ - struct stat st; - - if (fstat(f->real_fd, &st) < 0) - return -1; - if (S_ISBLK(st.st_mode)) { - unsigned long long bytes; - - if (ioctl(f->real_fd, BLKGETSIZE64, &bytes) != 0) - return -1; - - f->max_blocks = bytes / BS; - return 0; - } else if (S_ISREG(st.st_mode)) { - f->max_blocks = st.st_size / BS; - return 0; - } - - return -1; -} - -static int reap_events(struct submitter *s) -{ - struct io_cq_ring *ring = &s->cq_ring; - struct io_uring_cqe *cqe; - unsigned head, reaped = 0; - - head = *ring->head; - do { - struct file *f; - - read_barrier(); - if (head == *ring->tail) - break; - cqe = &ring->cqes[head & cq_ring_mask]; - if (!do_nop) { - f = (struct file *) (uintptr_t) cqe->user_data; - f->pending_ios--; - if (cqe->res != BS) { - printf("io: unexpected ret=%d\n", cqe->res); - if (polled && cqe->res == -EOPNOTSUPP) - printf("Your filesystem doesn't support poll\n"); - return -1; - } - } - reaped++; - head++; - } while (1); - - s->inflight -= reaped; - *ring->head = head; - write_barrier(); - return reaped; -} - -static void *submitter_fn(void *data) -{ - struct submitter *s = data; - struct io_sq_ring *ring = &s->sq_ring; - int ret, prepped; - - printf("submitter=%d\n", lk_gettid()); - - srand48_r(pthread_self(), &s->rand); - - prepped = 0; - do { - int to_wait, to_submit, this_reap, to_prep; - - if (!prepped && s->inflight < DEPTH) { - to_prep = min(DEPTH - s->inflight, BATCH_SUBMIT); - prepped = prep_more_ios(s, to_prep); - } - s->inflight += prepped; -submit_more: - to_submit = prepped; -submit: - if (to_submit && (s->inflight + to_submit <= DEPTH)) - to_wait = 0; - else - to_wait = min(s->inflight + to_submit, BATCH_COMPLETE); - - /* - * Only need to call io_uring_enter if we're not using SQ thread - * poll, or if IORING_SQ_NEED_WAKEUP is set. - */ - if (!sq_thread_poll || (*ring->flags & IORING_SQ_NEED_WAKEUP)) { - unsigned flags = 0; - - if (to_wait) - flags = IORING_ENTER_GETEVENTS; - if ((*ring->flags & IORING_SQ_NEED_WAKEUP)) - flags |= IORING_ENTER_SQ_WAKEUP; - ret = io_uring_enter(s->ring_fd, to_submit, to_wait, - flags, NULL); - s->calls++; - } - - /* - * For non SQ thread poll, we already got the events we needed - * through the io_uring_enter() above. For SQ thread poll, we - * need to loop here until we find enough events. - */ - this_reap = 0; - do { - int r; - r = reap_events(s); - if (r == -1) { - s->finish = 1; - break; - } else if (r > 0) - this_reap += r; - } while (sq_thread_poll && this_reap < to_wait); - s->reaps += this_reap; - - if (ret >= 0) { - if (!ret) { - to_submit = 0; - if (s->inflight) - goto submit; - continue; - } else if (ret < to_submit) { - int diff = to_submit - ret; - - s->done += ret; - prepped -= diff; - goto submit_more; - } - s->done += ret; - prepped = 0; - continue; - } else if (ret < 0) { - if (errno == EAGAIN) { - if (s->finish) - break; - if (this_reap) - goto submit; - to_submit = 0; - goto submit; - } - printf("io_submit: %s\n", strerror(errno)); - break; - } - } while (!s->finish); - - finish = 1; - return NULL; -} - -static void sig_int(int sig) -{ - printf("Exiting on signal %d\n", sig); - submitters[0].finish = 1; - finish = 1; -} - -static void arm_sig_int(void) -{ - struct sigaction act; - - memset(&act, 0, sizeof(act)); - act.sa_handler = sig_int; - act.sa_flags = SA_RESTART; - sigaction(SIGINT, &act, NULL); -} - -static int setup_ring(struct submitter *s) -{ - struct io_sq_ring *sring = &s->sq_ring; - struct io_cq_ring *cring = &s->cq_ring; - struct io_uring_params p; - int ret, fd; - void *ptr; - - memset(&p, 0, sizeof(p)); - - if (polled && !do_nop) - p.flags |= IORING_SETUP_IOPOLL; - if (sq_thread_poll) { - p.flags |= IORING_SETUP_SQPOLL; - if (sq_thread_cpu != -1) { - p.flags |= IORING_SETUP_SQ_AFF; - p.sq_thread_cpu = sq_thread_cpu; - } - } - - fd = io_uring_setup(DEPTH, &p); - if (fd < 0) { - perror("io_uring_setup"); - return 1; - } - s->ring_fd = fd; - - if (fixedbufs) { - ret = io_uring_register_buffers(s); - if (ret < 0) { - perror("io_uring_register_buffers"); - return 1; - } - } - - if (register_files) { - ret = io_uring_register_files(s); - if (ret < 0) { - perror("io_uring_register_files"); - return 1; - } - } - - ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(__u32), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, - IORING_OFF_SQ_RING); - printf("sq_ring ptr = 0x%p\n", ptr); - sring->head = ptr + p.sq_off.head; - sring->tail = ptr + p.sq_off.tail; - sring->ring_mask = ptr + p.sq_off.ring_mask; - sring->ring_entries = ptr + p.sq_off.ring_entries; - sring->flags = ptr + p.sq_off.flags; - sring->array = ptr + p.sq_off.array; - sq_ring_mask = *sring->ring_mask; - - s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, - IORING_OFF_SQES); - printf("sqes ptr = 0x%p\n", s->sqes); - - ptr = mmap(0, p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe), - PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, - IORING_OFF_CQ_RING); - printf("cq_ring ptr = 0x%p\n", ptr); - cring->head = ptr + p.cq_off.head; - cring->tail = ptr + p.cq_off.tail; - cring->ring_mask = ptr + p.cq_off.ring_mask; - cring->ring_entries = ptr + p.cq_off.ring_entries; - cring->cqes = ptr + p.cq_off.cqes; - cq_ring_mask = *cring->ring_mask; - return 0; -} - -static void file_depths(char *buf) -{ - struct submitter *s = &submitters[0]; - unsigned i; - char *p; - - buf[0] = '\0'; - p = buf; - for (i = 0; i < s->nr_files; i++) { - struct file *f = &s->files[i]; - - if (i + 1 == s->nr_files) - p += sprintf(p, "%d", f->pending_ios); - else - p += sprintf(p, "%d, ", f->pending_ios); - } -} - -int main(int argc, char *argv[]) -{ - struct submitter *s = &submitters[0]; - unsigned long done, calls, reap; - int err, i, flags, fd; - char *fdepths; - void *ret; - - if (!do_nop && argc < 2) { - printf("%s: filename\n", argv[0]); - return 1; - } - - flags = O_RDONLY | O_NOATIME; - if (!buffered) - flags |= O_DIRECT; - - i = 1; - while (!do_nop && i < argc) { - struct file *f; - - if (s->nr_files == MAX_FDS) { - printf("Max number of files (%d) reached\n", MAX_FDS); - break; - } - fd = open(argv[i], flags); - if (fd < 0) { - perror("open"); - return 1; - } - - f = &s->files[s->nr_files]; - f->real_fd = fd; - if (get_file_size(f)) { - printf("failed getting size of device/file\n"); - return 1; - } - if (f->max_blocks <= 1) { - printf("Zero file/device size?\n"); - return 1; - } - f->max_blocks--; - - printf("Added file %s\n", argv[i]); - s->nr_files++; - i++; - } - - if (fixedbufs) { - struct rlimit rlim; - - rlim.rlim_cur = RLIM_INFINITY; - rlim.rlim_max = RLIM_INFINITY; - if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) { - perror("setrlimit"); - return 1; - } - } - - arm_sig_int(); - - for (i = 0; i < DEPTH; i++) { - void *buf; - - if (posix_memalign(&buf, BS, BS)) { - printf("failed alloc\n"); - return 1; - } - s->iovecs[i].iov_base = buf; - s->iovecs[i].iov_len = BS; - } - - err = setup_ring(s); - if (err) { - printf("ring setup failed: %s, %d\n", strerror(errno), err); - return 1; - } - printf("polled=%d, fixedbufs=%d, buffered=%d", polled, fixedbufs, buffered); - printf(" QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, *s->sq_ring.ring_entries, *s->cq_ring.ring_entries); - - pthread_create(&s->thread, NULL, submitter_fn, s); - - fdepths = malloc(8 * s->nr_files); - reap = calls = done = 0; - do { - unsigned long this_done = 0; - unsigned long this_reap = 0; - unsigned long this_call = 0; - unsigned long rpc = 0, ipc = 0; - - sleep(1); - this_done += s->done; - this_call += s->calls; - this_reap += s->reaps; - if (this_call - calls) { - rpc = (this_done - done) / (this_call - calls); - ipc = (this_reap - reap) / (this_call - calls); - } else - rpc = ipc = -1; - file_depths(fdepths); - printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s)\n", - this_done - done, rpc, ipc, s->inflight, - fdepths); - done = this_done; - calls = this_call; - reap = this_reap; - } while (!finish); - - pthread_join(s->thread, &ret); - close(s->ring_fd); - free(fdepths); - return 0; -} diff --git a/tools/io_uring/io_uring-cp.c b/tools/io_uring/io_uring-cp.c deleted file mode 100644 index d9bd6f5f8f46..000000000000 --- a/tools/io_uring/io_uring-cp.c +++ /dev/null @@ -1,283 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Simple test program that demonstrates a file copy through io_uring. This - * uses the API exposed by liburing. - * - * Copyright (C) 2018-2019 Jens Axboe - */ -#include <stdio.h> -#include <fcntl.h> -#include <string.h> -#include <stdlib.h> -#include <unistd.h> -#include <assert.h> -#include <errno.h> -#include <inttypes.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/ioctl.h> - -#include "liburing.h" - -#define QD 64 -#define BS (32*1024) - -static int infd, outfd; - -struct io_data { - int read; - off_t first_offset, offset; - size_t first_len; - struct iovec iov; -}; - -static int setup_context(unsigned entries, struct io_uring *ring) -{ - int ret; - - ret = io_uring_queue_init(entries, ring, 0); - if (ret < 0) { - fprintf(stderr, "queue_init: %s\n", strerror(-ret)); - return -1; - } - - return 0; -} - -static int get_file_size(int fd, off_t *size) -{ - struct stat st; - - if (fstat(fd, &st) < 0) - return -1; - if (S_ISREG(st.st_mode)) { - *size = st.st_size; - return 0; - } else if (S_ISBLK(st.st_mode)) { - unsigned long long bytes; - - if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) - return -1; - - *size = bytes; - return 0; - } - - return -1; -} - -static void queue_prepped(struct io_uring *ring, struct io_data *data) -{ - struct io_uring_sqe *sqe; - - sqe = io_uring_get_sqe(ring); - assert(sqe); - - if (data->read) - io_uring_prep_readv(sqe, infd, &data->iov, 1, data->offset); - else - io_uring_prep_writev(sqe, outfd, &data->iov, 1, data->offset); - - io_uring_sqe_set_data(sqe, data); -} - -static int queue_read(struct io_uring *ring, off_t size, off_t offset) -{ - struct io_uring_sqe *sqe; - struct io_data *data; - - data = malloc(size + sizeof(*data)); - if (!data) - return 1; - - sqe = io_uring_get_sqe(ring); - if (!sqe) { - free(data); - return 1; - } - - data->read = 1; - data->offset = data->first_offset = offset; - - data->iov.iov_base = data + 1; - data->iov.iov_len = size; - data->first_len = size; - - io_uring_prep_readv(sqe, infd, &data->iov, 1, offset); - io_uring_sqe_set_data(sqe, data); - return 0; -} - -static void queue_write(struct io_uring *ring, struct io_data *data) -{ - data->read = 0; - data->offset = data->first_offset; - - data->iov.iov_base = data + 1; - data->iov.iov_len = data->first_len; - - queue_prepped(ring, data); - io_uring_submit(ring); -} - -static int copy_file(struct io_uring *ring, off_t insize) -{ - unsigned long reads, writes; - struct io_uring_cqe *cqe; - off_t write_left, offset; - int ret; - - write_left = insize; - writes = reads = offset = 0; - - while (insize || write_left) { - int had_reads, got_comp; - - /* - * Queue up as many reads as we can - */ - had_reads = reads; - while (insize) { - off_t this_size = insize; - - if (reads + writes >= QD) - break; - if (this_size > BS) - this_size = BS; - else if (!this_size) - break; - - if (queue_read(ring, this_size, offset)) - break; - - insize -= this_size; - offset += this_size; - reads++; - } - - if (had_reads != reads) { - ret = io_uring_submit(ring); - if (ret < 0) { - fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret)); - break; - } - } - - /* - * Queue is full at this point. Find at least one completion. - */ - got_comp = 0; - while (write_left) { - struct io_data *data; - - if (!got_comp) { - ret = io_uring_wait_cqe(ring, &cqe); - got_comp = 1; - } else { - ret = io_uring_peek_cqe(ring, &cqe); - if (ret == -EAGAIN) { - cqe = NULL; - ret = 0; - } - } - if (ret < 0) { - fprintf(stderr, "io_uring_peek_cqe: %s\n", - strerror(-ret)); - return 1; - } - if (!cqe) - break; - - data = io_uring_cqe_get_data(cqe); - if (cqe->res < 0) { - if (cqe->res == -EAGAIN) { - queue_prepped(ring, data); - io_uring_cqe_seen(ring, cqe); - continue; - } - fprintf(stderr, "cqe failed: %s\n", - strerror(-cqe->res)); - return 1; - } else if (cqe->res != data->iov.iov_len) { - /* Short read/write, adjust and requeue */ - data->iov.iov_base += cqe->res; - data->iov.iov_len -= cqe->res; - data->offset += cqe->res; - queue_prepped(ring, data); - io_uring_cqe_seen(ring, cqe); - continue; - } - - /* - * All done. if write, nothing else to do. if read, - * queue up corresponding write. - */ - if (data->read) { - queue_write(ring, data); - write_left -= data->first_len; - reads--; - writes++; - } else { - free(data); - writes--; - } - io_uring_cqe_seen(ring, cqe); - } - } - - /* wait out pending writes */ - while (writes) { - struct io_data *data; - - ret = io_uring_wait_cqe(ring, &cqe); - if (ret) { - fprintf(stderr, "wait_cqe=%d\n", ret); - return 1; - } - if (cqe->res < 0) { - fprintf(stderr, "write res=%d\n", cqe->res); - return 1; - } - data = io_uring_cqe_get_data(cqe); - free(data); - writes--; - io_uring_cqe_seen(ring, cqe); - } - - return 0; -} - -int main(int argc, char *argv[]) -{ - struct io_uring ring; - off_t insize; - int ret; - - if (argc < 3) { - printf("%s: infile outfile\n", argv[0]); - return 1; - } - - infd = open(argv[1], O_RDONLY); - if (infd < 0) { - perror("open infile"); - return 1; - } - outfd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0644); - if (outfd < 0) { - perror("open outfile"); - return 1; - } - - if (setup_context(QD, &ring)) - return 1; - if (get_file_size(infd, &insize)) - return 1; - - ret = copy_file(&ring, insize); - - close(infd); - close(outfd); - io_uring_queue_exit(&ring); - return ret; -} diff --git a/tools/io_uring/liburing.h b/tools/io_uring/liburing.h deleted file mode 100644 index 28a837b6069d..000000000000 --- a/tools/io_uring/liburing.h +++ /dev/null @@ -1,187 +0,0 @@ -#ifndef LIB_URING_H -#define LIB_URING_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/uio.h> -#include <signal.h> -#include <string.h> -#include "../../include/uapi/linux/io_uring.h" -#include <inttypes.h> -#include <linux/swab.h> -#include "barrier.h" - -/* - * Library interface to io_uring - */ -struct io_uring_sq { - unsigned *khead; - unsigned *ktail; - unsigned *kring_mask; - unsigned *kring_entries; - unsigned *kflags; - unsigned *kdropped; - unsigned *array; - struct io_uring_sqe *sqes; - - unsigned sqe_head; - unsigned sqe_tail; - - size_t ring_sz; -}; - -struct io_uring_cq { - unsigned *khead; - unsigned *ktail; - unsigned *kring_mask; - unsigned *kring_entries; - unsigned *koverflow; - struct io_uring_cqe *cqes; - - size_t ring_sz; -}; - -struct io_uring { - struct io_uring_sq sq; - struct io_uring_cq cq; - int ring_fd; -}; - -/* - * System calls - */ -extern int io_uring_setup(unsigned entries, struct io_uring_params *p); -extern int io_uring_enter(int fd, unsigned to_submit, - unsigned min_complete, unsigned flags, sigset_t *sig); -extern int io_uring_register(int fd, unsigned int opcode, void *arg, - unsigned int nr_args); - -/* - * Library interface - */ -extern int io_uring_queue_init(unsigned entries, struct io_uring *ring, - unsigned flags); -extern int io_uring_queue_mmap(int fd, struct io_uring_params *p, - struct io_uring *ring); -extern void io_uring_queue_exit(struct io_uring *ring); -extern int io_uring_peek_cqe(struct io_uring *ring, - struct io_uring_cqe **cqe_ptr); -extern int io_uring_wait_cqe(struct io_uring *ring, - struct io_uring_cqe **cqe_ptr); -extern int io_uring_submit(struct io_uring *ring); -extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring); - -/* - * Must be called after io_uring_{peek,wait}_cqe() after the cqe has - * been processed by the application. - */ -static inline void io_uring_cqe_seen(struct io_uring *ring, - struct io_uring_cqe *cqe) -{ - if (cqe) { - struct io_uring_cq *cq = &ring->cq; - - (*cq->khead)++; - /* - * Ensure that the kernel sees our new head, the kernel has - * the matching read barrier. - */ - write_barrier(); - } -} - -/* - * Command prep helpers - */ -static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data) -{ - sqe->user_data = (unsigned long) data; -} - -static inline void *io_uring_cqe_get_data(struct io_uring_cqe *cqe) -{ - return (void *) (uintptr_t) cqe->user_data; -} - -static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd, - const void *addr, unsigned len, - off_t offset) -{ - memset(sqe, 0, sizeof(*sqe)); - sqe->opcode = op; - sqe->fd = fd; - sqe->off = offset; - sqe->addr = (unsigned long) addr; - sqe->len = len; -} - -static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd, - const struct iovec *iovecs, - unsigned nr_vecs, off_t offset) -{ - io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset); -} - -static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd, - void *buf, unsigned nbytes, - off_t offset) -{ - io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset); -} - -static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd, - const struct iovec *iovecs, - unsigned nr_vecs, off_t offset) -{ - io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset); -} - -static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd, - const void *buf, unsigned nbytes, - off_t offset) -{ - io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset); -} - -static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd, - unsigned poll_mask) -{ - memset(sqe, 0, sizeof(*sqe)); - sqe->opcode = IORING_OP_POLL_ADD; - sqe->fd = fd; -#if __BYTE_ORDER == __BIG_ENDIAN - poll_mask = __swahw32(poll_mask); -#endif - sqe->poll_events = poll_mask; -} - -static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe, - void *user_data) -{ - memset(sqe, 0, sizeof(*sqe)); - sqe->opcode = IORING_OP_POLL_REMOVE; - sqe->addr = (unsigned long) user_data; -} - -static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd, - unsigned fsync_flags) -{ - memset(sqe, 0, sizeof(*sqe)); - sqe->opcode = IORING_OP_FSYNC; - sqe->fd = fd; - sqe->fsync_flags = fsync_flags; -} - -static inline void io_uring_prep_nop(struct io_uring_sqe *sqe) -{ - memset(sqe, 0, sizeof(*sqe)); - sqe->opcode = IORING_OP_NOP; -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/tools/io_uring/queue.c b/tools/io_uring/queue.c deleted file mode 100644 index 321819c132c7..000000000000 --- a/tools/io_uring/queue.c +++ /dev/null @@ -1,156 +0,0 @@ -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> - -#include "liburing.h" -#include "barrier.h" - -static int __io_uring_get_cqe(struct io_uring *ring, - struct io_uring_cqe **cqe_ptr, int wait) -{ - struct io_uring_cq *cq = &ring->cq; - const unsigned mask = *cq->kring_mask; - unsigned head; - int ret; - - *cqe_ptr = NULL; - head = *cq->khead; - do { - /* - * It's necessary to use a read_barrier() before reading - * the CQ tail, since the kernel updates it locklessly. The - * kernel has the matching store barrier for the update. The - * kernel also ensures that previous stores to CQEs are ordered - * with the tail update. - */ - read_barrier(); - if (head != *cq->ktail) { - *cqe_ptr = &cq->cqes[head & mask]; - break; - } - if (!wait) - break; - ret = io_uring_enter(ring->ring_fd, 0, 1, - IORING_ENTER_GETEVENTS, NULL); - if (ret < 0) - return -errno; - } while (1); - - return 0; -} - -/* - * Return an IO completion, if one is readily available. Returns 0 with - * cqe_ptr filled in on success, -errno on failure. - */ -int io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr) -{ - return __io_uring_get_cqe(ring, cqe_ptr, 0); -} - -/* - * Return an IO completion, waiting for it if necessary. Returns 0 with - * cqe_ptr filled in on success, -errno on failure. - */ -int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr) -{ - return __io_uring_get_cqe(ring, cqe_ptr, 1); -} - -/* - * Submit sqes acquired from io_uring_get_sqe() to the kernel. - * - * Returns number of sqes submitted - */ -int io_uring_submit(struct io_uring *ring) -{ - struct io_uring_sq *sq = &ring->sq; - const unsigned mask = *sq->kring_mask; - unsigned ktail, ktail_next, submitted, to_submit; - int ret; - - /* - * If we have pending IO in the kring, submit it first. We need a - * read barrier here to match the kernels store barrier when updating - * the SQ head. - */ - read_barrier(); - if (*sq->khead != *sq->ktail) { - submitted = *sq->kring_entries; - goto submit; - } - - if (sq->sqe_head == sq->sqe_tail) - return 0; - - /* - * Fill in sqes that we have queued up, adding them to the kernel ring - */ - submitted = 0; - ktail = ktail_next = *sq->ktail; - to_submit = sq->sqe_tail - sq->sqe_head; - while (to_submit--) { - ktail_next++; - read_barrier(); - - sq->array[ktail & mask] = sq->sqe_head & mask; - ktail = ktail_next; - - sq->sqe_head++; - submitted++; - } - - if (!submitted) - return 0; - - if (*sq->ktail != ktail) { - /* - * First write barrier ensures that the SQE stores are updated - * with the tail update. This is needed so that the kernel - * will never see a tail update without the preceeding sQE - * stores being done. - */ - write_barrier(); - *sq->ktail = ktail; - /* - * The kernel has the matching read barrier for reading the - * SQ tail. - */ - write_barrier(); - } - -submit: - ret = io_uring_enter(ring->ring_fd, submitted, 0, - IORING_ENTER_GETEVENTS, NULL); - if (ret < 0) - return -errno; - - return ret; -} - -/* - * Return an sqe to fill. Application must later call io_uring_submit() - * when it's ready to tell the kernel about it. The caller may call this - * function multiple times before calling io_uring_submit(). - * - * Returns a vacant sqe, or NULL if we're full. - */ -struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) -{ - struct io_uring_sq *sq = &ring->sq; - unsigned next = sq->sqe_tail + 1; - struct io_uring_sqe *sqe; - - /* - * All sqes are used - */ - if (next - sq->sqe_head > *sq->kring_entries) - return NULL; - - sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask]; - sq->sqe_tail = next; - return sqe; -} diff --git a/tools/io_uring/setup.c b/tools/io_uring/setup.c deleted file mode 100644 index 0b50fcd78520..000000000000 --- a/tools/io_uring/setup.c +++ /dev/null @@ -1,107 +0,0 @@ -#include <sys/types.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> - -#include "liburing.h" - -static int io_uring_mmap(int fd, struct io_uring_params *p, - struct io_uring_sq *sq, struct io_uring_cq *cq) -{ - size_t size; - void *ptr; - int ret; - - sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned); - ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); - if (ptr == MAP_FAILED) - return -errno; - sq->khead = ptr + p->sq_off.head; - sq->ktail = ptr + p->sq_off.tail; - sq->kring_mask = ptr + p->sq_off.ring_mask; - sq->kring_entries = ptr + p->sq_off.ring_entries; - sq->kflags = ptr + p->sq_off.flags; - sq->kdropped = ptr + p->sq_off.dropped; - sq->array = ptr + p->sq_off.array; - - size = p->sq_entries * sizeof(struct io_uring_sqe); - sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, - IORING_OFF_SQES); - if (sq->sqes == MAP_FAILED) { - ret = -errno; -err: - munmap(sq->khead, sq->ring_sz); - return ret; - } - - cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); - ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); - if (ptr == MAP_FAILED) { - ret = -errno; - munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe)); - goto err; - } - cq->khead = ptr + p->cq_off.head; - cq->ktail = ptr + p->cq_off.tail; - cq->kring_mask = ptr + p->cq_off.ring_mask; - cq->kring_entries = ptr + p->cq_off.ring_entries; - cq->koverflow = ptr + p->cq_off.overflow; - cq->cqes = ptr + p->cq_off.cqes; - return 0; -} - -/* - * For users that want to specify sq_thread_cpu or sq_thread_idle, this - * interface is a convenient helper for mmap()ing the rings. - * Returns -1 on error, or zero on success. On success, 'ring' - * contains the necessary information to read/write to the rings. - */ -int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring) -{ - int ret; - - memset(ring, 0, sizeof(*ring)); - ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq); - if (!ret) - ring->ring_fd = fd; - return ret; -} - -/* - * Returns -1 on error, or zero on success. On success, 'ring' - * contains the necessary information to read/write to the rings. - */ -int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags) -{ - struct io_uring_params p; - int fd, ret; - - memset(&p, 0, sizeof(p)); - p.flags = flags; - - fd = io_uring_setup(entries, &p); - if (fd < 0) - return fd; - - ret = io_uring_queue_mmap(fd, &p, ring); - if (ret) - close(fd); - - return ret; -} - -void io_uring_queue_exit(struct io_uring *ring) -{ - struct io_uring_sq *sq = &ring->sq; - struct io_uring_cq *cq = &ring->cq; - - munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe)); - munmap(sq->khead, sq->ring_sz); - munmap(cq->khead, cq->ring_sz); - close(ring->ring_fd); -} diff --git a/tools/io_uring/syscall.c b/tools/io_uring/syscall.c deleted file mode 100644 index b22e0aa54e9d..000000000000 --- a/tools/io_uring/syscall.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Will go away once libc support is there - */ -#include <unistd.h> -#include <sys/syscall.h> -#include <sys/uio.h> -#include <signal.h> -#include "liburing.h" - -#ifdef __alpha__ -/* - * alpha is the only exception, all other architectures - * have common numbers for new system calls. - */ -# ifndef __NR_io_uring_setup -# define __NR_io_uring_setup 535 -# endif -# ifndef __NR_io_uring_enter -# define __NR_io_uring_enter 536 -# endif -# ifndef __NR_io_uring_register -# define __NR_io_uring_register 537 -# endif -#else /* !__alpha__ */ -# ifndef __NR_io_uring_setup -# define __NR_io_uring_setup 425 -# endif -# ifndef __NR_io_uring_enter -# define __NR_io_uring_enter 426 -# endif -# ifndef __NR_io_uring_register -# define __NR_io_uring_register 427 -# endif -#endif - -int io_uring_register(int fd, unsigned int opcode, void *arg, - unsigned int nr_args) -{ - return syscall(__NR_io_uring_register, fd, opcode, arg, nr_args); -} - -int io_uring_setup(unsigned int entries, struct io_uring_params *p) -{ - return syscall(__NR_io_uring_setup, entries, p); -} - -int io_uring_enter(int fd, unsigned int to_submit, unsigned int min_complete, - unsigned int flags, sigset_t *sig) -{ - return syscall(__NR_io_uring_enter, fd, to_submit, min_complete, - flags, sig, _NSIG / 8); -} diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index 0d1634cedf44..2184814b37dd 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -392,6 +392,72 @@ static u64 read_perf_counter(unsigned int counter) static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } +/* __riscv_xlen contains the witdh of the native base integer, here 64-bit */ +#elif defined(__riscv) && __riscv_xlen == 64 + +/* TODO: implement rv32 support */ + +#define CSR_CYCLE 0xc00 +#define CSR_TIME 0xc01 + +#define csr_read(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__ ("csrr %0, %1" \ + : "=r" (__v) \ + : "i" (csr) : ); \ + __v; \ +}) + +static unsigned long csr_read_num(int csr_num) +{ +#define switchcase_csr_read(__csr_num, __val) {\ + case __csr_num: \ + __val = csr_read(__csr_num); \ + break; } +#define switchcase_csr_read_2(__csr_num, __val) {\ + switchcase_csr_read(__csr_num + 0, __val) \ + switchcase_csr_read(__csr_num + 1, __val)} +#define switchcase_csr_read_4(__csr_num, __val) {\ + switchcase_csr_read_2(__csr_num + 0, __val) \ + switchcase_csr_read_2(__csr_num + 2, __val)} +#define switchcase_csr_read_8(__csr_num, __val) {\ + switchcase_csr_read_4(__csr_num + 0, __val) \ + switchcase_csr_read_4(__csr_num + 4, __val)} +#define switchcase_csr_read_16(__csr_num, __val) {\ + switchcase_csr_read_8(__csr_num + 0, __val) \ + switchcase_csr_read_8(__csr_num + 8, __val)} +#define switchcase_csr_read_32(__csr_num, __val) {\ + switchcase_csr_read_16(__csr_num + 0, __val) \ + switchcase_csr_read_16(__csr_num + 16, __val)} + + unsigned long ret = 0; + + switch (csr_num) { + switchcase_csr_read_32(CSR_CYCLE, ret) + default: + break; + } + + return ret; +#undef switchcase_csr_read_32 +#undef switchcase_csr_read_16 +#undef switchcase_csr_read_8 +#undef switchcase_csr_read_4 +#undef switchcase_csr_read_2 +#undef switchcase_csr_read +} + +static u64 read_perf_counter(unsigned int counter) +{ + return csr_read_num(CSR_CYCLE + counter); +} + +static u64 read_timestamp(void) +{ + return csr_read_num(CSR_TIME); +} + #else static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } static u64 read_timestamp(void) { return 0; } diff --git a/tools/mm/Makefile b/tools/mm/Makefile index 6c1da51f4177..1c5606cc3334 100644 --- a/tools/mm/Makefile +++ b/tools/mm/Makefile @@ -8,8 +8,8 @@ TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api LIBS = $(LIB_DIR)/libapi.a -CFLAGS += -Wall -Wextra -I../lib/ -LDFLAGS += $(LIBS) +CFLAGS += -Wall -Wextra -I../lib/ -pthread +LDFLAGS += $(LIBS) -pthread all: $(TARGETS) diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index e68ca6229756..886a13a77a16 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -284,7 +284,8 @@ static struct test_case tests__basic_mmap[] = { "permissions"), TEST_CASE_REASON("User space counter reading of instructions", mmap_user_read_instr, -#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \ + (defined(__riscv) && __riscv_xlen == 64) "permissions" #else "unsupported" @@ -292,7 +293,8 @@ static struct test_case tests__basic_mmap[] = { ), TEST_CASE_REASON("User space counter reading of cycles", mmap_user_read_cycles, -#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || \ + (defined(__riscv) && __riscv_xlen == 64) "permissions" #else "unsupported" diff --git a/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh b/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh index eed9ce0fcbe6..87dc68c7de0c 100755 --- a/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh +++ b/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh @@ -12,7 +12,7 @@ x86_irq_vectors=${arch_x86_header_dir}/irq_vectors.h # FIRST_EXTERNAL_VECTOR is not that useful, find what is its number # and then replace whatever is using it and that is useful, which at -# the time of writing of this script was: IRQ_MOVE_CLEANUP_VECTOR. +# the time of writing of this script was: 0x20. first_external_regex='^#define[[:space:]]+FIRST_EXTERNAL_VECTOR[[:space:]]+(0x[[:xdigit:]]+)$' first_external_vector=$(grep -E ${first_external_regex} ${x86_irq_vectors} | sed -r "s/${first_external_regex}/\1/g") diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index dc531805a570..b53753dee02f 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -57,7 +57,7 @@ LIB_MIN= 1 PACKAGE = cpupower PACKAGE_BUGREPORT = linux-pm@vger.kernel.org -LANGUAGES = de fr it cs pt +LANGUAGES = de fr it cs pt ka # Directory definitions. These are default and most probably diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c index a73346e854b8..5fcc2a92957e 100644 --- a/tools/power/x86/intel-speed-select/isst-config.c +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -5,6 +5,7 @@ */ #include <linux/isst_if.h> +#include <sys/utsname.h> #include "isst.h" @@ -15,7 +16,7 @@ struct process_cmd_struct { int arg; }; -static const char *version_str = "v1.16"; +static const char *version_str = "v1.17"; static const int supported_api_ver = 2; static struct isst_if_platform_info isst_platform_info; @@ -473,11 +474,44 @@ static unsigned int is_cpu_online(int cpu) return online; } +static int get_kernel_version(int *major, int *minor) +{ + struct utsname buf; + int ret; + + ret = uname(&buf); + if (ret) + return ret; + + ret = sscanf(buf.release, "%d.%d", major, minor); + if (ret != 2) + return ret; + + return 0; +} + +#define CPU0_HOTPLUG_DEPRECATE_MAJOR_VER 6 +#define CPU0_HOTPLUG_DEPRECATE_MINOR_VER 5 + void set_cpu_online_offline(int cpu, int state) { char buffer[128]; int fd, ret; + if (!cpu) { + int major, minor; + + ret = get_kernel_version(&major, &minor); + if (!ret) { + if (major > CPU0_HOTPLUG_DEPRECATE_MAJOR_VER || (major == CPU0_HOTPLUG_DEPRECATE_MAJOR_VER && + minor >= CPU0_HOTPLUG_DEPRECATE_MINOR_VER)) { + debug_printf("Ignore CPU 0 offline/online for kernel version >= %d.%d\n", major, minor); + debug_printf("Use cgroups to isolate CPU 0\n"); + return; + } + } + } + snprintf(buffer, sizeof(buffer), "/sys/devices/system/cpu/cpu%d/online", cpu); @@ -778,6 +812,7 @@ static void create_cpu_map(void) map.cpu_map[0].logical_cpu); } else { update_punit_cpu_info(map.cpu_map[0].physical_cpu, &cpu_map[i]); + punit_id = cpu_map[i].punit_id; } } cpu_map[i].initialized = 1; @@ -2621,10 +2656,11 @@ static struct process_cmd_struct isst_cmds[] = { */ void parse_cpu_command(char *optarg) { - unsigned int start, end; + unsigned int start, end, invalid_count; char *next; next = optarg; + invalid_count = 0; while (next && *next) { if (*next == '-') /* no negative cpu numbers */ @@ -2634,6 +2670,8 @@ void parse_cpu_command(char *optarg) if (max_target_cpus < MAX_CPUS_IN_ONE_REQ) target_cpus[max_target_cpus++] = start; + else + invalid_count = 1; if (*next == '\0') break; @@ -2660,6 +2698,8 @@ void parse_cpu_command(char *optarg) while (++start <= end) { if (max_target_cpus < MAX_CPUS_IN_ONE_REQ) target_cpus[max_target_cpus++] = start; + else + invalid_count = 1; } if (*next == ',') @@ -2668,6 +2708,13 @@ void parse_cpu_command(char *optarg) goto error; } + if (invalid_count) { + isst_ctdp_display_information_start(outf); + isst_display_error_info_message(1, "Too many CPUs in one request: max is", 1, MAX_CPUS_IN_ONE_REQ - 1); + isst_ctdp_display_information_end(outf); + exit(-1); + } + #ifdef DEBUG { int i; diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c index 0403d42ab1ba..14c9b037859a 100644 --- a/tools/power/x86/intel-speed-select/isst-display.c +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -442,7 +442,7 @@ void isst_ctdp_display_information(struct isst_id *id, FILE *outf, int tdp_level } if (ctdp_level->mem_freq) { - snprintf(header, sizeof(header), "mem-frequency(MHz)"); + snprintf(header, sizeof(header), "max-mem-frequency(MHz)"); snprintf(value, sizeof(value), "%d", ctdp_level->mem_freq); format_and_print(outf, level + 2, header, value); diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h index 54fc21575d56..8def22dec4a2 100644 --- a/tools/power/x86/intel-speed-select/isst.h +++ b/tools/power/x86/intel-speed-select/isst.h @@ -79,7 +79,7 @@ #define DISP_FREQ_MULTIPLIER 100 -#define MAX_PACKAGE_COUNT 8 +#define MAX_PACKAGE_COUNT 32 #define MAX_DIE_PER_PACKAGE 2 #define MAX_PUNIT_PER_DIE 8 diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index 873f3e06ccad..3bf506d4a63c 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -37,3 +37,8 @@ CONFIG_REGMAP_BUILD=y CONFIG_SECURITY=y CONFIG_SECURITY_APPARMOR=y + +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SOC=y +CONFIG_SND_SOC_TOPOLOGY_BUILD=y diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 75ea2081a317..e5da1cad70ba 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -45,6 +45,13 @@ struct rcu_test_struct2 { unsigned long last[RCU_RANGE_COUNT]; }; +struct rcu_test_struct3 { + struct maple_tree *mt; + unsigned long index; + unsigned long last; + bool stop; +}; + struct rcu_reader_struct { unsigned int id; int mod; @@ -34954,6 +34961,70 @@ void run_check_rcu(struct maple_tree *mt, struct rcu_test_struct *vals) MT_BUG_ON(mt, !vals->seen_entry2); } +static void *rcu_slot_store_reader(void *ptr) +{ + struct rcu_test_struct3 *test = ptr; + MA_STATE(mas, test->mt, test->index, test->index); + + rcu_register_thread(); + + rcu_read_lock(); + while (!test->stop) { + mas_walk(&mas); + /* The length of growth to both sides must be equal. */ + RCU_MT_BUG_ON(test, (test->index - mas.index) != + (mas.last - test->last)); + } + rcu_read_unlock(); + + rcu_unregister_thread(); + return NULL; +} + +static noinline void run_check_rcu_slot_store(struct maple_tree *mt) +{ + pthread_t readers[20]; + int range_cnt = 200, i, limit = 10000; + unsigned long len = ULONG_MAX / range_cnt, start, end; + struct rcu_test_struct3 test = {.stop = false, .mt = mt}; + + start = range_cnt / 2 * len; + end = start + len - 1; + test.index = start; + test.last = end; + + for (i = 0; i < range_cnt; i++) { + mtree_store_range(mt, i * len, i * len + len - 1, + xa_mk_value(i * 100), GFP_KERNEL); + } + + mt_set_in_rcu(mt); + MT_BUG_ON(mt, !mt_in_rcu(mt)); + + for (i = 0; i < ARRAY_SIZE(readers); i++) { + if (pthread_create(&readers[i], NULL, rcu_slot_store_reader, + &test)) { + perror("creating reader thread"); + exit(1); + } + } + + usleep(5); + + while (limit--) { + /* Step by step, expand the most middle range to both sides. */ + mtree_store_range(mt, --start, ++end, xa_mk_value(100), + GFP_KERNEL); + } + + test.stop = true; + + while (i--) + pthread_join(readers[i], NULL); + + mt_validate(mt); +} + static noinline void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals) { @@ -35206,6 +35277,10 @@ static noinline void __init check_rcu_threaded(struct maple_tree *mt) run_check_rcu(mt, &vals); mtree_destroy(mt); + /* Check expanding range in RCU mode */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + run_check_rcu_slot_store(mt); + mtree_destroy(mt); /* Forward writer for rcu stress */ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); @@ -35383,7 +35458,9 @@ static noinline void __init check_prealloc(struct maple_tree *mt) for (i = 0; i <= max; i++) mtree_test_store_range(mt, i * 10, i * 10 + 5, &i); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + /* Spanning store */ + mas_set_range(&mas, 470, 500); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); @@ -35392,105 +35469,108 @@ static noinline void __init check_prealloc(struct maple_tree *mt) allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mn = mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mn = mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mn = mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); mas_push_node(&mas, mn); MT_BUG_ON(mt, mas_allocated(&mas) != allocated); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + /* Slot store does not need allocations */ + mas_set_range(&mas, 6, 9); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); - height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); - MT_BUG_ON(mt, allocated != 1 + height * 3); + MT_BUG_ON(mt, allocated != 0); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + + mas_set_range(&mas, 6, 10); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); - MT_BUG_ON(mt, allocated != 1 + height * 3); + MT_BUG_ON(mt, allocated != 1); mas_store_prealloc(&mas, ptr); + MT_BUG_ON(mt, mas_allocated(&mas) != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + /* Split */ + mas_set_range(&mas, 54, 54); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); - MT_BUG_ON(mt, allocated != 1 + height * 3); + MT_BUG_ON(mt, allocated != 1 + height * 2); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); mt_set_non_kernel(1); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL & GFP_NOWAIT) == 0); + /* Spanning store */ + mas_set_range(&mas, 1, 100); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL & GFP_NOWAIT) == 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated != 0); mas_destroy(&mas); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + /* Spanning store */ + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); + mas_set_range(&mas, 0, 200); mt_set_non_kernel(1); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL & GFP_NOWAIT) == 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL & GFP_NOWAIT) == 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated != 0); diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c index b7eef32addb4..2f5e3c462194 100644 --- a/tools/testing/selftests/alsa/pcm-test.c +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -258,6 +258,8 @@ static void test_pcm_time(struct pcm_data *data, enum test_class class, const char *test_name, snd_config_t *pcm_cfg) { char name[64], key[128], msg[256]; + const int duration_s = 2, margin_ms = 100; + const int duration_ms = duration_s * 1000; const char *cs; int i, err; snd_pcm_t *handle = NULL; @@ -442,7 +444,7 @@ __format: skip = false; timestamp_now(&tstamp); - for (i = 0; i < 4; i++) { + for (i = 0; i < duration_s; i++) { if (data->stream == SND_PCM_STREAM_PLAYBACK) { frames = snd_pcm_writei(handle, samples, rate); if (frames < 0) { @@ -472,8 +474,8 @@ __format: snd_pcm_drain(handle); ms = timestamp_diff_ms(&tstamp); - if (ms < 3900 || ms > 4100) { - snprintf(msg, sizeof(msg), "time mismatch: expected 4000ms got %lld", ms); + if (ms < duration_ms - margin_ms || ms > duration_ms + margin_ms) { + snprintf(msg, sizeof(msg), "time mismatch: expected %dms got %lld", duration_ms, ms); goto __close; } diff --git a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c index a1b139888048..511ac634eef0 100644 --- a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c +++ b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c @@ -15,7 +15,7 @@ long total_entries = 0; #define ENTRY_CNT 32 struct perf_branch_entry entries[ENTRY_CNT] = {}; -static inline bool in_range(__u64 val) +static inline bool gbs_in_range(__u64 val) { return (val >= address_low) && (val < address_high); } @@ -31,7 +31,7 @@ int BPF_PROG(test1, int n, int ret) for (i = 0; i < ENTRY_CNT; i++) { if (i >= total_entries) break; - if (in_range(entries[i].from) && in_range(entries[i].to)) + if (gbs_in_range(entries[i].from) && gbs_in_range(entries[i].to)) test1_hits++; else if (!test1_hits) wasted_entries++; diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index c4a57e69f749..af8c3f30b9c1 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -5,4 +5,6 @@ test_freezer test_kmem test_kill test_cpu +test_cpuset +test_zswap wait_inotify diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 3d263747d2ad..c27f05f6ce9b 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -12,6 +12,8 @@ TEST_GEN_PROGS += test_core TEST_GEN_PROGS += test_freezer TEST_GEN_PROGS += test_kill TEST_GEN_PROGS += test_cpu +TEST_GEN_PROGS += test_cpuset +TEST_GEN_PROGS += test_zswap LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h @@ -23,3 +25,5 @@ $(OUTPUT)/test_core: cgroup_util.c $(OUTPUT)/test_freezer: cgroup_util.c $(OUTPUT)/test_kill: cgroup_util.c $(OUTPUT)/test_cpu: cgroup_util.c +$(OUTPUT)/test_cpuset: cgroup_util.c +$(OUTPUT)/test_zswap: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index e8bbbdb77e0d..0340d4ca8f51 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -286,6 +286,8 @@ int cg_destroy(const char *cgroup) { int ret; + if (!cgroup) + return 0; retry: ret = rmdir(cgroup); if (ret && errno == EBUSY) { diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index c92df4e5d395..1df7f202214a 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -11,6 +11,8 @@ #define USEC_PER_SEC 1000000L #define NSEC_PER_SEC 1000000000L +#define TEST_UID 65534 /* usually nobody, any !root is fine */ + /* * Checks if two given values differ by less than err% of their sum. */ diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 600123503063..80aa6b2373b9 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -683,7 +683,7 @@ cleanup: */ static int test_cgcore_lesser_euid_open(const char *root) { - const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ + const uid_t test_euid = TEST_UID; int ret = KSFT_FAIL; char *cg_test_a = NULL, *cg_test_b = NULL; char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; diff --git a/tools/testing/selftests/cgroup/test_cpuset.c b/tools/testing/selftests/cgroup/test_cpuset.c new file mode 100644 index 000000000000..b061ed1e05b4 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_cpuset.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/limits.h> +#include <signal.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + +static int idle_process_fn(const char *cgroup, void *arg) +{ + (void)pause(); + return 0; +} + +static int do_migration_fn(const char *cgroup, void *arg) +{ + int object_pid = (int)(size_t)arg; + + if (setuid(TEST_UID)) + return EXIT_FAILURE; + + // XXX checking /proc/$pid/cgroup would be quicker than wait + if (cg_enter(cgroup, object_pid) || + cg_wait_for_proc_count(cgroup, 1)) + return EXIT_FAILURE; + + return EXIT_SUCCESS; +} + +static int do_controller_fn(const char *cgroup, void *arg) +{ + const char *child = cgroup; + const char *parent = arg; + + if (setuid(TEST_UID)) + return EXIT_FAILURE; + + if (!cg_read_strstr(child, "cgroup.controllers", "cpuset")) + return EXIT_FAILURE; + + if (cg_write(parent, "cgroup.subtree_control", "+cpuset")) + return EXIT_FAILURE; + + if (cg_read_strstr(child, "cgroup.controllers", "cpuset")) + return EXIT_FAILURE; + + if (cg_write(parent, "cgroup.subtree_control", "-cpuset")) + return EXIT_FAILURE; + + if (!cg_read_strstr(child, "cgroup.controllers", "cpuset")) + return EXIT_FAILURE; + + return EXIT_SUCCESS; +} + +/* + * Migrate a process between two sibling cgroups. + * The success should only depend on the parent cgroup permissions and not the + * migrated process itself (cpuset controller is in place because it uses + * security_task_setscheduler() in cgroup v1). + * + * Deliberately don't set cpuset.cpus in children to avoid definining migration + * permissions between two different cpusets. + */ +static int test_cpuset_perms_object(const char *root, bool allow) +{ + char *parent = NULL, *child_src = NULL, *child_dst = NULL; + char *parent_procs = NULL, *child_src_procs = NULL, *child_dst_procs = NULL; + const uid_t test_euid = TEST_UID; + int object_pid = 0; + int ret = KSFT_FAIL; + + parent = cg_name(root, "cpuset_test_0"); + if (!parent) + goto cleanup; + parent_procs = cg_name(parent, "cgroup.procs"); + if (!parent_procs) + goto cleanup; + if (cg_create(parent)) + goto cleanup; + + child_src = cg_name(parent, "cpuset_test_1"); + if (!child_src) + goto cleanup; + child_src_procs = cg_name(child_src, "cgroup.procs"); + if (!child_src_procs) + goto cleanup; + if (cg_create(child_src)) + goto cleanup; + + child_dst = cg_name(parent, "cpuset_test_2"); + if (!child_dst) + goto cleanup; + child_dst_procs = cg_name(child_dst, "cgroup.procs"); + if (!child_dst_procs) + goto cleanup; + if (cg_create(child_dst)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+cpuset")) + goto cleanup; + + if (cg_read_strstr(child_src, "cgroup.controllers", "cpuset") || + cg_read_strstr(child_dst, "cgroup.controllers", "cpuset")) + goto cleanup; + + /* Enable permissions along src->dst tree path */ + if (chown(child_src_procs, test_euid, -1) || + chown(child_dst_procs, test_euid, -1)) + goto cleanup; + + if (allow && chown(parent_procs, test_euid, -1)) + goto cleanup; + + /* Fork a privileged child as a test object */ + object_pid = cg_run_nowait(child_src, idle_process_fn, NULL); + if (object_pid < 0) + goto cleanup; + + /* Carry out migration in a child process that can drop all privileges + * (including capabilities), the main process must remain privileged for + * cleanup. + * Child process's cgroup is irrelevant but we place it into child_dst + * as hacky way to pass information about migration target to the child. + */ + if (allow ^ (cg_run(child_dst, do_migration_fn, (void *)(size_t)object_pid) == EXIT_SUCCESS)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (object_pid > 0) { + (void)kill(object_pid, SIGTERM); + (void)clone_reap(object_pid, WEXITED); + } + + cg_destroy(child_dst); + free(child_dst_procs); + free(child_dst); + + cg_destroy(child_src); + free(child_src_procs); + free(child_src); + + cg_destroy(parent); + free(parent_procs); + free(parent); + + return ret; +} + +static int test_cpuset_perms_object_allow(const char *root) +{ + return test_cpuset_perms_object(root, true); +} + +static int test_cpuset_perms_object_deny(const char *root) +{ + return test_cpuset_perms_object(root, false); +} + +/* + * Migrate a process between parent and child implicitely + * Implicit migration happens when a controller is enabled/disabled. + * + */ +static int test_cpuset_perms_subtree(const char *root) +{ + char *parent = NULL, *child = NULL; + char *parent_procs = NULL, *parent_subctl = NULL, *child_procs = NULL; + const uid_t test_euid = TEST_UID; + int object_pid = 0; + int ret = KSFT_FAIL; + + parent = cg_name(root, "cpuset_test_0"); + if (!parent) + goto cleanup; + parent_procs = cg_name(parent, "cgroup.procs"); + if (!parent_procs) + goto cleanup; + parent_subctl = cg_name(parent, "cgroup.subtree_control"); + if (!parent_subctl) + goto cleanup; + if (cg_create(parent)) + goto cleanup; + + child = cg_name(parent, "cpuset_test_1"); + if (!child) + goto cleanup; + child_procs = cg_name(child, "cgroup.procs"); + if (!child_procs) + goto cleanup; + if (cg_create(child)) + goto cleanup; + + /* Enable permissions as in a delegated subtree */ + if (chown(parent_procs, test_euid, -1) || + chown(parent_subctl, test_euid, -1) || + chown(child_procs, test_euid, -1)) + goto cleanup; + + /* Put a privileged child in the subtree and modify controller state + * from an unprivileged process, the main process remains privileged + * for cleanup. + * The unprivileged child runs in subtree too to avoid parent and + * internal-node constraing violation. + */ + object_pid = cg_run_nowait(child, idle_process_fn, NULL); + if (object_pid < 0) + goto cleanup; + + if (cg_run(child, do_controller_fn, parent) != EXIT_SUCCESS) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (object_pid > 0) { + (void)kill(object_pid, SIGTERM); + (void)clone_reap(object_pid, WEXITED); + } + + cg_destroy(child); + free(child_procs); + free(child); + + cg_destroy(parent); + free(parent_subctl); + free(parent_procs); + free(parent); + + return ret; +} + + +#define T(x) { x, #x } +struct cpuset_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_cpuset_perms_object_allow), + T(test_cpuset_perms_object_deny), + T(test_cpuset_perms_subtree), +}; +#undef T + +int main(int argc, char *argv[]) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "cpuset")) + if (cg_write(root, "cgroup.subtree_control", "+cpuset")) + ksft_exit_skip("Failed to set cpuset controller\n"); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 2b5215cc599f..4afb132e4e4f 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -10,7 +10,7 @@ skip_test() { echo "$1" echo "Test SKIPPED" - exit 0 + exit 4 # ksft_skip } [[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!" diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index ed2e50bb1e76..c82f974b85c9 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -162,11 +162,11 @@ static int cg_run_in_subcgroups(const char *parent, * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS * threads. Then it checks the sanity of numbers on the parent level: * the total size of the cgroups should be roughly equal to - * anon + file + slab + kernel_stack. + * anon + file + kernel + sock. */ static int test_kmem_memcg_deletion(const char *root) { - long current, slab, anon, file, kernel_stack, pagetables, percpu, sock, sum; + long current, anon, file, kernel, sock, sum; int ret = KSFT_FAIL; char *parent; @@ -184,29 +184,22 @@ static int test_kmem_memcg_deletion(const char *root) goto cleanup; current = cg_read_long(parent, "memory.current"); - slab = cg_read_key_long(parent, "memory.stat", "slab "); anon = cg_read_key_long(parent, "memory.stat", "anon "); file = cg_read_key_long(parent, "memory.stat", "file "); - kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack "); - pagetables = cg_read_key_long(parent, "memory.stat", "pagetables "); - percpu = cg_read_key_long(parent, "memory.stat", "percpu "); + kernel = cg_read_key_long(parent, "memory.stat", "kernel "); sock = cg_read_key_long(parent, "memory.stat", "sock "); - if (current < 0 || slab < 0 || anon < 0 || file < 0 || - kernel_stack < 0 || pagetables < 0 || percpu < 0 || sock < 0) + if (current < 0 || anon < 0 || file < 0 || kernel < 0 || sock < 0) goto cleanup; - sum = slab + anon + file + kernel_stack + pagetables + percpu + sock; + sum = anon + file + kernel + sock; if (abs(sum - current) < MAX_VMSTAT_ERROR) { ret = KSFT_PASS; } else { printf("memory.current = %ld\n", current); - printf("slab + anon + file + kernel_stack = %ld\n", sum); - printf("slab = %ld\n", slab); + printf("anon + file + kernel + sock = %ld\n", sum); printf("anon = %ld\n", anon); printf("file = %ld\n", file); - printf("kernel_stack = %ld\n", kernel_stack); - printf("pagetables = %ld\n", pagetables); - printf("percpu = %ld\n", percpu); + printf("kernel = %ld\n", kernel); printf("sock = %ld\n", sock); } diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c new file mode 100644 index 000000000000..49def87a909b --- /dev/null +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <linux/limits.h> +#include <unistd.h> +#include <stdio.h> +#include <signal.h> +#include <sys/sysinfo.h> +#include <string.h> +#include <sys/wait.h> +#include <sys/mman.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + +static int read_int(const char *path, size_t *value) +{ + FILE *file; + int ret = 0; + + file = fopen(path, "r"); + if (!file) + return -1; + if (fscanf(file, "%ld", value) != 1) + ret = -1; + fclose(file); + return ret; +} + +static int set_min_free_kb(size_t value) +{ + FILE *file; + int ret; + + file = fopen("/proc/sys/vm/min_free_kbytes", "w"); + if (!file) + return -1; + ret = fprintf(file, "%ld\n", value); + fclose(file); + return ret; +} + +static int read_min_free_kb(size_t *value) +{ + return read_int("/proc/sys/vm/min_free_kbytes", value); +} + +static int get_zswap_stored_pages(size_t *value) +{ + return read_int("/sys/kernel/debug/zswap/stored_pages", value); +} + +static int get_zswap_written_back_pages(size_t *value) +{ + return read_int("/sys/kernel/debug/zswap/written_back_pages", value); +} + +static int allocate_bytes(const char *cgroup, void *arg) +{ + size_t size = (size_t)arg; + char *mem = (char *)malloc(size); + + if (!mem) + return -1; + for (int i = 0; i < size; i += 4095) + mem[i] = 'a'; + free(mem); + return 0; +} + +/* + * When trying to store a memcg page in zswap, if the memcg hits its memory + * limit in zswap, writeback should not be triggered. + * + * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may + * not zswap"). Needs to be revised when a per memcg writeback mechanism is + * implemented. + */ +static int test_no_invasive_cgroup_shrink(const char *root) +{ + size_t written_back_before, written_back_after; + int ret = KSFT_FAIL; + char *test_group; + + /* Set up */ + test_group = cg_name(root, "no_shrink_test"); + if (!test_group) + goto out; + if (cg_create(test_group)) + goto out; + if (cg_write(test_group, "memory.max", "1M")) + goto out; + if (cg_write(test_group, "memory.zswap.max", "10K")) + goto out; + if (get_zswap_written_back_pages(&written_back_before)) + goto out; + + /* Allocate 10x memory.max to push memory into zswap */ + if (cg_run(test_group, allocate_bytes, (void *)MB(10))) + goto out; + + /* Verify that no writeback happened because of the memcg allocation */ + if (get_zswap_written_back_pages(&written_back_after)) + goto out; + if (written_back_after == written_back_before) + ret = KSFT_PASS; +out: + cg_destroy(test_group); + free(test_group); + return ret; +} + +struct no_kmem_bypass_child_args { + size_t target_alloc_bytes; + size_t child_allocated; +}; + +static int no_kmem_bypass_child(const char *cgroup, void *arg) +{ + struct no_kmem_bypass_child_args *values = arg; + void *allocation; + + allocation = malloc(values->target_alloc_bytes); + if (!allocation) { + values->child_allocated = true; + return -1; + } + for (long i = 0; i < values->target_alloc_bytes; i += 4095) + ((char *)allocation)[i] = 'a'; + values->child_allocated = true; + pause(); + free(allocation); + return 0; +} + +/* + * When pages owned by a memcg are pushed to zswap by kswapd, they should be + * charged to that cgroup. This wasn't the case before commit + * cd08d80ecdac("mm: correctly charge compressed memory to its memcg"). + * + * The test first allocates memory in a memcg, then raises min_free_kbytes to + * a very high value so that the allocation falls below low wm, then makes + * another allocation to trigger kswapd that should push the memcg-owned pages + * to zswap and verifies that the zswap pages are correctly charged. + * + * To be run on a VM with at most 4G of memory. + */ +static int test_no_kmem_bypass(const char *root) +{ + size_t min_free_kb_high, min_free_kb_low, min_free_kb_original; + struct no_kmem_bypass_child_args *values; + size_t trigger_allocation_size; + int wait_child_iteration = 0; + long stored_pages_threshold; + struct sysinfo sys_info; + int ret = KSFT_FAIL; + int child_status; + char *test_group; + pid_t child_pid; + + /* Read sys info and compute test values accordingly */ + if (sysinfo(&sys_info) != 0) + return KSFT_FAIL; + if (sys_info.totalram > 5000000000) + return KSFT_SKIP; + values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ | + PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (values == MAP_FAILED) + return KSFT_FAIL; + if (read_min_free_kb(&min_free_kb_original)) + return KSFT_FAIL; + min_free_kb_high = sys_info.totalram / 2000; + min_free_kb_low = sys_info.totalram / 500000; + values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) + + sys_info.totalram * 5 / 100; + stored_pages_threshold = sys_info.totalram / 5 / 4096; + trigger_allocation_size = sys_info.totalram / 20; + + /* Set up test memcg */ + if (cg_write(root, "cgroup.subtree_control", "+memory")) + goto out; + test_group = cg_name(root, "kmem_bypass_test"); + if (!test_group) + goto out; + + /* Spawn memcg child and wait for it to allocate */ + set_min_free_kb(min_free_kb_low); + if (cg_create(test_group)) + goto out; + values->child_allocated = false; + child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values); + if (child_pid < 0) + goto out; + while (!values->child_allocated && wait_child_iteration++ < 10000) + usleep(1000); + + /* Try to wakeup kswapd and let it push child memory to zswap */ + set_min_free_kb(min_free_kb_high); + for (int i = 0; i < 20; i++) { + size_t stored_pages; + char *trigger_allocation = malloc(trigger_allocation_size); + + if (!trigger_allocation) + break; + for (int i = 0; i < trigger_allocation_size; i += 4095) + trigger_allocation[i] = 'b'; + usleep(100000); + free(trigger_allocation); + if (get_zswap_stored_pages(&stored_pages)) + break; + if (stored_pages < 0) + break; + /* If memory was pushed to zswap, verify it belongs to memcg */ + if (stored_pages > stored_pages_threshold) { + int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped "); + int delta = stored_pages * 4096 - zswapped; + int result_ok = delta < stored_pages * 4096 / 4; + + ret = result_ok ? KSFT_PASS : KSFT_FAIL; + break; + } + } + + kill(child_pid, SIGTERM); + waitpid(child_pid, &child_status, 0); +out: + set_min_free_kb(min_free_kb_original); + cg_destroy(test_group); + free(test_group); + return ret; +} + +#define T(x) { x, #x } +struct zswap_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_no_kmem_bypass), + T(test_no_invasive_cgroup_shrink), +}; +#undef T + +static bool zswap_configured(void) +{ + return access("/sys/module/zswap", F_OK) == 0; +} + +int main(int argc, char **argv) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + if (!zswap_configured()) + ksft_exit_skip("zswap isn't configured\n"); + + /* + * Check that memory controller is available: + * memory is listed in cgroup.controllers + */ + if (cg_read_strstr(root, "cgroup.controllers", "memory")) + ksft_exit_skip("memory controller isn't available\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index bcd4734ca094..60a9a305aef0 100644 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -84,6 +84,7 @@ test_tried_regions() { tried_regions_dir=$1 ensure_dir "$tried_regions_dir" "exist" + ensure_file "$tried_regions_dir/total_bytes" "exist" "400" } test_stats() @@ -102,9 +103,14 @@ test_filter() ensure_file "$filter_dir/type" "exist" "600" ensure_write_succ "$filter_dir/type" "anon" "valid input" ensure_write_succ "$filter_dir/type" "memcg" "valid input" + ensure_write_succ "$filter_dir/type" "addr" "valid input" + ensure_write_succ "$filter_dir/type" "target" "valid input" ensure_write_fail "$filter_dir/type" "foo" "invalid input" ensure_file "$filter_dir/matching" "exist" "600" ensure_file "$filter_dir/memcg_path" "exist" "600" + ensure_file "$filter_dir/addr_start" "exist" "600" + ensure_file "$filter_dir/addr_end" "exist" "600" + ensure_file "$filter_dir/damon_target_idx" "exist" "600" } test_filters() diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc index f34b14ef9781..b9c21a81d248 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc @@ -5,6 +5,7 @@ KPROBES= FPROBES= +FIELDS= if grep -qF "p[:[<group>/][<event>]] <place> [<args>]" README ; then KPROBES=yes @@ -12,6 +13,9 @@ fi if grep -qF "f[:[<group>/][<event>]] <func-name>[%return] [<args>]" README ; then FPROBES=yes fi +if grep -qF "<argname>[->field[->field|.field...]]" README ; then + FIELDS=yes +fi if [ -z "$KPROBES" -a -z "$FPROBES" ] ; then exit_unsupported @@ -21,6 +25,9 @@ echo 0 > events/enable echo > dynamic_events TP=kfree +TP2=kmem_cache_alloc +TP3=getname_flags +TP4=sched_wakeup if [ "$FPROBES" ] ; then echo "f:fpevent $TP object" >> dynamic_events @@ -33,6 +40,7 @@ echo > dynamic_events echo "f:fpevent $TP "'$arg1' >> dynamic_events grep -q "fpevent.*object=object" dynamic_events + echo > dynamic_events echo "f:fpevent $TP "'$arg*' >> dynamic_events @@ -45,6 +53,18 @@ fi echo > dynamic_events +if [ "$FIELDS" ] ; then +echo "t:tpevent ${TP2} obj_size=s->object_size" >> dynamic_events +echo "f:fpevent ${TP3}%return path=\$retval->name:string" >> dynamic_events +echo "t:tpevent2 ${TP4} p->se.group_node.next->prev" >> dynamic_events + +grep -q "tpevent .*obj_size=s->object_size" dynamic_events +grep -q "fpevent.*path=\$retval->name:string" dynamic_events +grep -q 'tpevent2 .*p->se.group_node.next->prev' dynamic_events + +echo > dynamic_events +fi + if [ "$KPROBES" ] ; then echo "p:kpevent $TP object" >> dynamic_events grep -q "kpevent.*object=object" dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc index 812f5b3f6055..20e42c030095 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc @@ -30,11 +30,11 @@ check_error 'f:^ vfs_read' # NO_EVENT_NAME check_error 'f:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG check_error 'f:foo/^bar.1 vfs_read' # BAD_EVENT_NAME -check_error 'f vfs_read ^$retval' # RETVAL_ON_PROBE check_error 'f vfs_read ^$stack10000' # BAD_STACK_NUM check_error 'f vfs_read ^$arg10000' # BAD_ARG_NUM +check_error 'f vfs_read $retval ^$arg1' # BAD_VAR check_error 'f vfs_read ^$none_var' # BAD_VAR check_error 'f vfs_read ^'$REG # BAD_VAR @@ -103,6 +103,14 @@ check_error 'f vfs_read%return ^$arg*' # NOFENTRY_ARGS check_error 'f vfs_read ^hoge' # NO_BTFARG check_error 'f kfree ^$arg10' # NO_BTFARG (exceed the number of parameters) check_error 'f kfree%return ^$retval' # NO_RETVAL + +if grep -qF "<argname>[->field[->field|.field...]]" README ; then +check_error 'f vfs_read%return $retval->^foo' # NO_PTR_STRCT +check_error 'f vfs_read file->^foo' # NO_BTF_FIELD +check_error 'f vfs_read file^-.foo' # BAD_HYPHEN +check_error 'f vfs_read ^file:string' # BAD_TYPE4STR +fi + else check_error 'f vfs_read ^$arg*' # NOSUP_BTFARG check_error 't kfree ^$arg*' # NOSUP_BTFARG diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc index 285b4770efad..ff7499eb98d6 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc @@ -34,14 +34,19 @@ mips*) esac : "Test get argument (1)" -echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):char" > kprobe_events +if grep -q eventfs_add_dir available_filter_functions; then + DIR_NAME="eventfs_add_dir" +else + DIR_NAME="tracefs_create_dir" +fi +echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):char" > kprobe_events echo 1 > events/kprobes/testprobe/enable echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1='t'" trace echo 0 > events/kprobes/testprobe/enable : "Test get argument (2)" -echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):char arg2=+0(${ARG1}):char[4]" > kprobe_events +echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):char arg2=+0(${ARG1}):char[4]" > kprobe_events echo 1 > events/kprobes/testprobe/enable echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1='t' arg2={'t','e','s','t'}" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index a4f8e7c53c1f..a202b2ea4baf 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -37,14 +37,19 @@ loongarch*) esac : "Test get argument (1)" -echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events +if grep -q eventfs_add_dir available_filter_functions; then + DIR_NAME="eventfs_add_dir" +else + DIR_NAME="tracefs_create_dir" +fi +echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\"" trace echo 0 > events/kprobes/testprobe/enable : "Test get argument (2)" -echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events +echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index e4a6b33cfde4..33d08600be13 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -9,9 +9,6 @@ #include "iommufd_utils.h" -static void *buffer; - -static unsigned long PAGE_SIZE; static unsigned long HUGEPAGE_SIZE; #define MOCK_PAGE_SIZE (PAGE_SIZE / 2) @@ -116,6 +113,7 @@ TEST_F(iommufd, cmd_length) } TEST_LENGTH(iommu_destroy, IOMMU_DESTROY); + TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO); TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC); TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES); TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS); @@ -188,6 +186,7 @@ FIXTURE(iommufd_ioas) uint32_t ioas_id; uint32_t stdev_id; uint32_t hwpt_id; + uint32_t device_id; uint64_t base_iova; }; @@ -214,7 +213,7 @@ FIXTURE_SETUP(iommufd_ioas) for (i = 0; i != variant->mock_domains; i++) { test_cmd_mock_domain(self->ioas_id, &self->stdev_id, - &self->hwpt_id); + &self->hwpt_id, &self->device_id); self->base_iova = MOCK_APERTURE_START; } } @@ -265,7 +264,7 @@ TEST_F(iommufd_ioas, hwpt_attach) { /* Create a device attached directly to a hwpt */ if (self->stdev_id) { - test_cmd_mock_domain(self->hwpt_id, NULL, NULL); + test_cmd_mock_domain(self->hwpt_id, NULL, NULL, NULL); } else { test_err_mock_domain(ENOENT, self->hwpt_id, NULL, NULL); } @@ -293,6 +292,40 @@ TEST_F(iommufd_ioas, ioas_area_auto_destroy) } } +TEST_F(iommufd_ioas, get_hw_info) +{ + struct iommu_test_hw_info buffer_exact; + struct iommu_test_hw_info_buffer_larger { + struct iommu_test_hw_info info; + uint64_t trailing_bytes; + } buffer_larger; + struct iommu_test_hw_info_buffer_smaller { + __u32 flags; + } buffer_smaller; + + if (self->device_id) { + /* Provide a zero-size user_buffer */ + test_cmd_get_hw_info(self->device_id, NULL, 0); + /* Provide a user_buffer with exact size */ + test_cmd_get_hw_info(self->device_id, &buffer_exact, sizeof(buffer_exact)); + /* + * Provide a user_buffer with size larger than the exact size to check if + * kernel zero the trailing bytes. + */ + test_cmd_get_hw_info(self->device_id, &buffer_larger, sizeof(buffer_larger)); + /* + * Provide a user_buffer with size smaller than the exact size to check if + * the fields within the size range still gets updated. + */ + test_cmd_get_hw_info(self->device_id, &buffer_smaller, sizeof(buffer_smaller)); + } else { + test_err_get_hw_info(ENOENT, self->device_id, + &buffer_exact, sizeof(buffer_exact)); + test_err_get_hw_info(ENOENT, self->device_id, + &buffer_larger, sizeof(buffer_larger)); + } +} + TEST_F(iommufd_ioas, area) { int i; @@ -684,7 +717,7 @@ TEST_F(iommufd_ioas, access_pin) _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), &access_cmd)); test_cmd_mock_domain(self->ioas_id, &mock_stdev_id, - &mock_hwpt_id); + &mock_hwpt_id, NULL); check_map_cmd.id = mock_hwpt_id; ASSERT_EQ(0, ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP), @@ -839,7 +872,7 @@ TEST_F(iommufd_ioas, fork_gone) * If a domain already existed then everything was pinned within * the fork, so this copies from one domain to another. */ - test_cmd_mock_domain(self->ioas_id, NULL, NULL); + test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL); check_access_rw(_metadata, self->fd, access_id, MOCK_APERTURE_START, 0); @@ -888,7 +921,7 @@ TEST_F(iommufd_ioas, fork_present) ASSERT_EQ(8, read(efd, &tmp, sizeof(tmp))); /* Read pages from the remote process */ - test_cmd_mock_domain(self->ioas_id, NULL, NULL); + test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL); check_access_rw(_metadata, self->fd, access_id, MOCK_APERTURE_START, 0); ASSERT_EQ(0, close(pipefds[1])); @@ -1035,6 +1068,8 @@ FIXTURE(iommufd_mock_domain) uint32_t ioas_id; uint32_t hwpt_id; uint32_t hwpt_ids[2]; + uint32_t stdev_ids[2]; + uint32_t idev_ids[2]; int mmap_flags; size_t mmap_buf_size; }; @@ -1056,7 +1091,8 @@ FIXTURE_SETUP(iommufd_mock_domain) ASSERT_GE(ARRAY_SIZE(self->hwpt_ids), variant->mock_domains); for (i = 0; i != variant->mock_domains; i++) - test_cmd_mock_domain(self->ioas_id, NULL, &self->hwpt_ids[i]); + test_cmd_mock_domain(self->ioas_id, &self->stdev_ids[i], + &self->hwpt_ids[i], &self->idev_ids[i]); self->hwpt_id = self->hwpt_ids[0]; self->mmap_flags = MAP_SHARED | MAP_ANONYMOUS; @@ -1250,7 +1286,7 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) /* Add and destroy a domain while the area exists */ old_id = self->hwpt_ids[1]; test_cmd_mock_domain(self->ioas_id, &mock_stdev_id, - &self->hwpt_ids[1]); + &self->hwpt_ids[1], NULL); check_mock_iova(buf + start, iova, length); check_refs(buf + start / PAGE_SIZE * PAGE_SIZE, @@ -1283,7 +1319,13 @@ TEST_F(iommufd_mock_domain, user_copy) .dst_iova = MOCK_APERTURE_START, .length = BUFFER_SIZE, }; - unsigned int ioas_id; + struct iommu_ioas_unmap unmap_cmd = { + .size = sizeof(unmap_cmd), + .ioas_id = self->ioas_id, + .iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + }; + unsigned int new_ioas_id, ioas_id; /* Pin the pages in an IOAS with no domains then copy to an IOAS with domains */ test_ioctl_ioas_alloc(&ioas_id); @@ -1301,13 +1343,77 @@ TEST_F(iommufd_mock_domain, user_copy) ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + /* Now replace the ioas with a new one */ + test_ioctl_ioas_alloc(&new_ioas_id); + test_ioctl_ioas_map_id(new_ioas_id, buffer, BUFFER_SIZE, + ©_cmd.src_iova); + test_cmd_access_replace_ioas(access_cmd.id, new_ioas_id); + + /* Destroy the old ioas and cleanup copied mapping */ + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_UNMAP, &unmap_cmd)); + test_ioctl_destroy(ioas_id); + + /* Then run the same test again with the new ioas */ + access_cmd.access_pages.iova = copy_cmd.src_iova; + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + copy_cmd.src_ioas_id = new_ioas_id; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); + check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + test_cmd_destroy_access_pages( access_cmd.id, access_cmd.access_pages.out_access_pages_id); test_cmd_destroy_access(access_cmd.id); + test_ioctl_destroy(new_ioas_id); +} + +TEST_F(iommufd_mock_domain, replace) +{ + uint32_t ioas_id; + + test_ioctl_ioas_alloc(&ioas_id); + + test_cmd_mock_domain_replace(self->stdev_ids[0], ioas_id); + + /* + * Replacing the IOAS causes the prior HWPT to be deallocated, thus we + * should get enoent when we try to use it. + */ + if (variant->mock_domains == 1) + test_err_mock_domain_replace(ENOENT, self->stdev_ids[0], + self->hwpt_ids[0]); + + test_cmd_mock_domain_replace(self->stdev_ids[0], ioas_id); + if (variant->mock_domains >= 2) { + test_cmd_mock_domain_replace(self->stdev_ids[0], + self->hwpt_ids[1]); + test_cmd_mock_domain_replace(self->stdev_ids[0], + self->hwpt_ids[1]); + test_cmd_mock_domain_replace(self->stdev_ids[0], + self->hwpt_ids[0]); + } + + test_cmd_mock_domain_replace(self->stdev_ids[0], self->ioas_id); test_ioctl_destroy(ioas_id); } +TEST_F(iommufd_mock_domain, alloc_hwpt) +{ + int i; + + for (i = 0; i != variant->mock_domains; i++) { + uint32_t stddev_id; + uint32_t hwpt_id; + + test_cmd_hwpt_alloc(self->idev_ids[0], self->ioas_id, &hwpt_id); + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + test_ioctl_destroy(stddev_id); + test_ioctl_destroy(hwpt_id); + } +} + /* VFIO compatibility IOCTLs */ TEST_F(iommufd, simple_ioctls) @@ -1429,7 +1535,7 @@ FIXTURE_SETUP(vfio_compat_mock_domain) /* Create what VFIO would consider a group */ test_ioctl_ioas_alloc(&self->ioas_id); - test_cmd_mock_domain(self->ioas_id, NULL, NULL); + test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL); /* Attach it to the vfio compat */ vfio_ioas_cmd.ioas_id = self->ioas_id; diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index d9afcb23810e..a220ca2a689d 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -41,6 +41,8 @@ static int writeat(int dfd, const char *fn, const char *val) static __attribute__((constructor)) void setup_buffer(void) { + PAGE_SIZE = sysconf(_SC_PAGE_SIZE); + BUFFER_SIZE = 2*1024*1024; buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE, @@ -313,7 +315,7 @@ TEST_FAIL_NTH(basic_fail_nth, map_domain) fail_nth_enable(); - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) return -1; if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, @@ -324,7 +326,7 @@ TEST_FAIL_NTH(basic_fail_nth, map_domain) if (_test_ioctl_destroy(self->fd, stdev_id)) return -1; - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) return -1; return 0; } @@ -348,12 +350,13 @@ TEST_FAIL_NTH(basic_fail_nth, map_two_domains) if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) return -1; - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) return -1; fail_nth_enable(); - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2, + NULL)) return -1; if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, @@ -367,9 +370,10 @@ TEST_FAIL_NTH(basic_fail_nth, map_two_domains) if (_test_ioctl_destroy(self->fd, stdev_id2)) return -1; - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) return -1; - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2, + NULL)) return -1; return 0; } @@ -526,7 +530,7 @@ TEST_FAIL_NTH(basic_fail_nth, access_pin_domain) if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) return -1; - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) return -1; if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova, @@ -569,4 +573,57 @@ TEST_FAIL_NTH(basic_fail_nth, access_pin_domain) return 0; } +/* device.c */ +TEST_FAIL_NTH(basic_fail_nth, device) +{ + struct iommu_test_hw_info info; + uint32_t ioas_id; + uint32_t ioas_id2; + uint32_t stdev_id; + uint32_t idev_id; + uint32_t hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id2)) + return -1; + + iova = MOCK_APERTURE_START; + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, PAGE_SIZE, &iova, + IOMMU_IOAS_MAP_FIXED_IOVA | + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + if (_test_ioctl_ioas_map(self->fd, ioas_id2, buffer, PAGE_SIZE, &iova, + IOMMU_IOAS_MAP_FIXED_IOVA | + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, NULL, + &idev_id)) + return -1; + + if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info))) + return -1; + + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, &hwpt_id)) + return -1; + + if (_test_cmd_mock_domain_replace(self->fd, stdev_id, ioas_id2, NULL)) + return -1; + + if (_test_cmd_mock_domain_replace(self->fd, stdev_id, hwpt_id, NULL)) + return -1; + return 0; +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 85d6662ef8e8..e0753d03ecaa 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -19,6 +19,12 @@ static void *buffer; static unsigned long BUFFER_SIZE; +static unsigned long PAGE_SIZE; + +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) + /* * Have the kernel check the refcount on pages. I don't know why a freshly * mmap'd anon non-compound page starts out with a ref of 3 @@ -39,7 +45,7 @@ static unsigned long BUFFER_SIZE; }) static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *stdev_id, - __u32 *hwpt_id) + __u32 *hwpt_id, __u32 *idev_id) { struct iommu_test_cmd cmd = { .size = sizeof(cmd), @@ -57,14 +63,84 @@ static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *stdev_id, assert(cmd.id != 0); if (hwpt_id) *hwpt_id = cmd.mock_domain.out_hwpt_id; + if (idev_id) + *idev_id = cmd.mock_domain.out_idev_id; return 0; } -#define test_cmd_mock_domain(ioas_id, stdev_id, hwpt_id) \ - ASSERT_EQ(0, \ - _test_cmd_mock_domain(self->fd, ioas_id, stdev_id, hwpt_id)) +#define test_cmd_mock_domain(ioas_id, stdev_id, hwpt_id, idev_id) \ + ASSERT_EQ(0, _test_cmd_mock_domain(self->fd, ioas_id, stdev_id, \ + hwpt_id, idev_id)) #define test_err_mock_domain(_errno, ioas_id, stdev_id, hwpt_id) \ EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \ - stdev_id, hwpt_id)) + stdev_id, hwpt_id, NULL)) + +static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id, + __u32 *hwpt_id) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE, + .id = stdev_id, + .mock_domain_replace = { + .pt_id = pt_id, + }, + }; + int ret; + + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); + if (ret) + return ret; + if (hwpt_id) + *hwpt_id = cmd.mock_domain_replace.pt_id; + return 0; +} + +#define test_cmd_mock_domain_replace(stdev_id, pt_id) \ + ASSERT_EQ(0, _test_cmd_mock_domain_replace(self->fd, stdev_id, pt_id, \ + NULL)) +#define test_err_mock_domain_replace(_errno, stdev_id, pt_id) \ + EXPECT_ERRNO(_errno, _test_cmd_mock_domain_replace(self->fd, stdev_id, \ + pt_id, NULL)) + +static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, + __u32 *hwpt_id) +{ + struct iommu_hwpt_alloc cmd = { + .size = sizeof(cmd), + .dev_id = device_id, + .pt_id = pt_id, + }; + int ret; + + ret = ioctl(fd, IOMMU_HWPT_ALLOC, &cmd); + if (ret) + return ret; + if (hwpt_id) + *hwpt_id = cmd.out_hwpt_id; + return 0; +} + +#define test_cmd_hwpt_alloc(device_id, pt_id, hwpt_id) \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, hwpt_id)) + +static int _test_cmd_access_replace_ioas(int fd, __u32 access_id, + unsigned int ioas_id) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_ACCESS_REPLACE_IOAS, + .id = access_id, + .access_replace_ioas = { .ioas_id = ioas_id }, + }; + int ret; + + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); + if (ret) + return ret; + return 0; +} +#define test_cmd_access_replace_ioas(access_id, ioas_id) \ + ASSERT_EQ(0, _test_cmd_access_replace_ioas(self->fd, access_id, ioas_id)) static int _test_cmd_create_access(int fd, unsigned int ioas_id, __u32 *access_id, unsigned int flags) @@ -276,3 +352,61 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata) }) #endif + +/* @data can be NULL */ +static int _test_cmd_get_hw_info(int fd, __u32 device_id, + void *data, size_t data_len) +{ + struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data; + struct iommu_hw_info cmd = { + .size = sizeof(cmd), + .dev_id = device_id, + .data_len = data_len, + .data_uptr = (uint64_t)data, + }; + int ret; + + ret = ioctl(fd, IOMMU_GET_HW_INFO, &cmd); + if (ret) + return ret; + + assert(cmd.out_data_type == IOMMU_HW_INFO_TYPE_SELFTEST); + + /* + * The struct iommu_test_hw_info should be the one defined + * by the current kernel. + */ + assert(cmd.data_len == sizeof(struct iommu_test_hw_info)); + + /* + * Trailing bytes should be 0 if user buffer is larger than + * the data that kernel reports. + */ + if (data_len > cmd.data_len) { + char *ptr = (char *)(data + cmd.data_len); + int idx = 0; + + while (idx < data_len - cmd.data_len) { + assert(!*(ptr + idx)); + idx++; + } + } + + if (info) { + if (data_len >= offsetofend(struct iommu_test_hw_info, test_reg)) + assert(info->test_reg == IOMMU_HW_INFO_SELFTEST_REGVAL); + if (data_len >= offsetofend(struct iommu_test_hw_info, flags)) + assert(!info->flags); + } + + return 0; +} + +#define test_cmd_get_hw_info(device_id, data, data_len) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \ + data, data_len)) + +#define test_err_get_hw_info(_errno, device_id, data, data_len) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_get_hw_info(self->fd, device_id, \ + data, data_len)) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 829be379545a..529d29a35900 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -113,6 +113,15 @@ static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; } static inline void ksft_print_header(void) { + /* + * Force line buffering; If stdout is not connected to a terminal, it + * will otherwise default to fully buffered, which can cause output + * duplication if there is content in the buffer when fork()ing. If + * there is a crash, line buffering also means the most recent output + * line will be visible. + */ + setvbuf(stdout, NULL, _IOLBF, 0); + if (!(getenv("KSFT_TAP_LEVEL"))) printf("TAP version 13\n"); } diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 1c952d1401d4..261c73cab41b 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -105,15 +105,18 @@ run_one() echo "# Warning: file $TEST is missing!" echo "not ok $test_num $TEST_HDR_MSG" else + if [ -x /usr/bin/stdbuf ]; then + stdbuf="/usr/bin/stdbuf --output=L " + fi eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}" - cmd="./$BASENAME_TEST $kselftest_cmd_args" + cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args" if [ ! -x "$TEST" ]; then echo "# Warning: file $TEST is not executable" if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ] then interpreter=$(head -n 1 "$TEST" | cut -c 3-) - cmd="$interpreter ./$BASENAME_TEST" + cmd="$stdbuf $interpreter ./$BASENAME_TEST" else echo "not ok $test_num $TEST_HDR_MSG" return diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index c692cc86e7da..a3bb36fb3cfc 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -23,6 +23,7 @@ LIBKVM += lib/guest_modes.c LIBKVM += lib/io.c LIBKVM += lib/kvm_util.c LIBKVM += lib/memstress.c +LIBKVM += lib/guest_sprintf.c LIBKVM += lib/rbtree.c LIBKVM += lib/sparsebit.c LIBKVM += lib/test_util.c @@ -122,6 +123,7 @@ TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test +TEST_GEN_PROGS_x86_64 += guest_print_test TEST_GEN_PROGS_x86_64 += hardware_disable_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += kvm_page_table_test @@ -140,7 +142,6 @@ TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions -TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/hypercalls TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test @@ -152,6 +153,8 @@ TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test +TEST_GEN_PROGS_aarch64 += guest_print_test +TEST_GEN_PROGS_aarch64 += get-reg-list TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += kvm_page_table_test TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test @@ -166,8 +169,10 @@ TEST_GEN_PROGS_s390x += s390x/resets TEST_GEN_PROGS_s390x += s390x/sync_regs_test TEST_GEN_PROGS_s390x += s390x/tprot TEST_GEN_PROGS_s390x += s390x/cmma_test +TEST_GEN_PROGS_s390x += s390x/debug_test TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += dirty_log_test +TEST_GEN_PROGS_s390x += guest_print_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS_s390x += kvm_page_table_test TEST_GEN_PROGS_s390x += rseq_test @@ -176,11 +181,15 @@ TEST_GEN_PROGS_s390x += kvm_binary_stats_test TEST_GEN_PROGS_riscv += demand_paging_test TEST_GEN_PROGS_riscv += dirty_log_test +TEST_GEN_PROGS_riscv += guest_print_test +TEST_GEN_PROGS_riscv += get-reg-list TEST_GEN_PROGS_riscv += kvm_create_max_vcpus TEST_GEN_PROGS_riscv += kvm_page_table_test TEST_GEN_PROGS_riscv += set_memory_region_test TEST_GEN_PROGS_riscv += kvm_binary_stats_test +SPLIT_TESTS += get-reg-list + TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR)) TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR)) TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR)) @@ -204,6 +213,7 @@ endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD\ -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ + -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ @@ -228,11 +238,14 @@ LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) +SPLIT_TESTS_TARGETS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) +SPLIT_TESTS_OBJS := $(patsubst %, $(ARCH_DIR)/%.o, $(SPLIT_TESTS)) TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS)) TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED)) TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ)) TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TESTS_OBJS)) -include $(TEST_DEP_FILES) $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o @@ -240,7 +253,10 @@ $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o $(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) cscope.* +$(SPLIT_TESTS_TARGETS): %: %.o $(SPLIT_TESTS_OBJS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ + +EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) $(SPLIT_TESTS_OBJS) cscope.* x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c index 4951ac53d1f8..b90580840b22 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c @@ -98,7 +98,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) uint64_t val; vcpu_get_reg(vcpu, reg_id, &val); - ASSERT_EQ(val, 0); + TEST_ASSERT_EQ(val, 0); /* * Expect the ioctl to succeed with no effect on the register @@ -107,7 +107,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); vcpu_get_reg(vcpu, reg_id, &val); - ASSERT_EQ(val, 0); + TEST_ASSERT_EQ(val, 0); } } @@ -127,14 +127,14 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu) uint64_t val; vcpu_get_reg(vcpu, reg_id, &val); - ASSERT_EQ(val, 0); + TEST_ASSERT_EQ(val, 0); r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); TEST_ASSERT(r < 0 && errno == EINVAL, "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); vcpu_get_reg(vcpu, reg_id, &val); - ASSERT_EQ(val, 0); + TEST_ASSERT_EQ(val, 0); } } diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 8ef370924a02..274b8465b42a 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -19,7 +19,6 @@ * * Copyright (c) 2021, Google LLC. */ - #define _GNU_SOURCE #include <stdlib.h> @@ -155,11 +154,13 @@ static void guest_validate_irq(unsigned int intid, xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); /* Make sure we are dealing with the correct timer IRQ */ - GUEST_ASSERT_2(intid == timer_irq, intid, timer_irq); + GUEST_ASSERT_EQ(intid, timer_irq); /* Basic 'timer condition met' check */ - GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us); - GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl); + __GUEST_ASSERT(xcnt >= cval, + "xcnt = 0x%llx, cval = 0x%llx, xcnt_diff_us = 0x%llx", + xcnt, cval, xcnt_diff_us); + __GUEST_ASSERT(xctl & CTL_ISTATUS, "xcnt = 0x%llx", xcnt); WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); } @@ -192,8 +193,7 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data, TIMER_TEST_ERR_MARGIN_US); irq_iter = READ_ONCE(shared_data->nr_iter); - GUEST_ASSERT_2(config_iter + 1 == irq_iter, - config_iter + 1, irq_iter); + GUEST_ASSERT_EQ(config_iter + 1, irq_iter); } } @@ -243,13 +243,9 @@ static void *test_vcpu_run(void *arg) break; case UCALL_ABORT: sync_global_from_guest(vm, *shared_data); - REPORT_GUEST_ASSERT_N(uc, "values: %lu, %lu; %lu, vcpu %u; stage; %u; iter: %u", - GUEST_ASSERT_ARG(uc, 0), - GUEST_ASSERT_ARG(uc, 1), - GUEST_ASSERT_ARG(uc, 2), - vcpu_idx, - shared_data->guest_stage, - shared_data->nr_iter); + fprintf(stderr, "Guest assert failed, vcpu %u; stage; %u; iter: %u\n", + vcpu_idx, shared_data->guest_stage, shared_data->nr_iter); + REPORT_GUEST_ASSERT(uc); break; default: TEST_FAIL("Unexpected guest exit\n"); diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 637be796086f..f5b6cb3a0019 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -365,7 +365,7 @@ static void guest_wp_handler(struct ex_regs *regs) static void guest_ss_handler(struct ex_regs *regs) { - GUEST_ASSERT_1(ss_idx < 4, ss_idx); + __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%u'", ss_idx); ss_addr[ss_idx++] = regs->pc; regs->pstate |= SPSR_SS; } @@ -410,8 +410,8 @@ static void guest_code_ss(int test_cnt) /* Userspace disables Single Step when the end is nigh. */ asm volatile("iter_ss_end:\n"); - GUEST_ASSERT(bvr == w_bvr); - GUEST_ASSERT(wvr == w_wvr); + GUEST_ASSERT_EQ(bvr, w_bvr); + GUEST_ASSERT_EQ(wvr, w_wvr); } GUEST_DONE(); } @@ -450,7 +450,7 @@ static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bp vcpu_run(vcpu); switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 4f10055af2aa..709d7d721760 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -4,50 +4,17 @@ * * Copyright (C) 2020, Red Hat, Inc. * - * When attempting to migrate from a host with an older kernel to a host - * with a newer kernel we allow the newer kernel on the destination to - * list new registers with get-reg-list. We assume they'll be unused, at - * least until the guest reboots, and so they're relatively harmless. - * However, if the destination host with the newer kernel is missing - * registers which the source host with the older kernel has, then that's - * a regression in get-reg-list. This test checks for that regression by - * checking the current list against a blessed list. We should never have - * missing registers, but if new ones appear then they can probably be - * added to the blessed list. A completely new blessed list can be created - * by running the test with the --list command line argument. - * - * Note, the blessed list should be created from the oldest possible - * kernel. We can't go older than v4.15, though, because that's the first - * release to expose the ID system registers in KVM_GET_REG_LIST, see - * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features - * from guests"). Also, one must use the --core-reg-fixup command line - * option when running on an older kernel that doesn't include df205b5c6328 - * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST") + * While the blessed list should be created from the oldest possible + * kernel, we can't go older than v5.2, though, because that's the first + * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid + * core register IDs in KVM_GET_REG_LIST"). Without that commit the core + * registers won't match expectations. */ #include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/wait.h> #include "kvm_util.h" #include "test_util.h" #include "processor.h" -static struct kvm_reg_list *reg_list; -static __u64 *blessed_reg, blessed_n; - -struct reg_sublist { - const char *name; - long capability; - int feature; - bool finalize; - __u64 *regs; - __u64 regs_n; - __u64 *rejects_set; - __u64 rejects_set_n; -}; - struct feature_id_reg { __u64 reg; __u64 id_reg; @@ -76,70 +43,7 @@ static struct feature_id_reg feat_id_regs[] = { } }; -struct vcpu_config { - char *name; - struct reg_sublist sublists[]; -}; - -static struct vcpu_config *vcpu_configs[]; -static int vcpu_configs_n; - -#define for_each_sublist(c, s) \ - for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) - -#define for_each_reg(i) \ - for ((i) = 0; (i) < reg_list->n; ++(i)) - -#define for_each_reg_filtered(i) \ - for_each_reg(i) \ - if (!filter_reg(reg_list->reg[i])) - -#define for_each_missing_reg(i) \ - for ((i) = 0; (i) < blessed_n; ++(i)) \ - if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) \ - if (check_supported_feat_reg(vcpu, blessed_reg[i])) - -#define for_each_new_reg(i) \ - for_each_reg_filtered(i) \ - if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) - -static const char *config_name(struct vcpu_config *c) -{ - struct reg_sublist *s; - int len = 0; - - if (c->name) - return c->name; - - for_each_sublist(c, s) - len += strlen(s->name) + 1; - - c->name = malloc(len); - - len = 0; - for_each_sublist(c, s) { - if (!strcmp(s->name, "base")) - continue; - strcat(c->name + len, s->name); - len += strlen(s->name) + 1; - c->name[len - 1] = '+'; - } - c->name[len - 1] = '\0'; - - return c->name; -} - -static bool has_cap(struct vcpu_config *c, long capability) -{ - struct reg_sublist *s; - - for_each_sublist(c, s) - if (s->capability == capability) - return true; - return false; -} - -static bool filter_reg(__u64 reg) +bool filter_reg(__u64 reg) { /* * DEMUX register presence depends on the host's CLIDR_EL1. @@ -151,16 +55,6 @@ static bool filter_reg(__u64 reg) return false; } -static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) -{ - int i; - - for (i = 0; i < nr_regs; ++i) - if (reg == regs[i]) - return true; - return false; -} - static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg) { int i, ret; @@ -180,17 +74,27 @@ static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg) return true; } -static const char *str_with_index(const char *template, __u64 index) +bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg) { - char *str, *p; - int n; + return check_supported_feat_reg(vcpu, reg); +} - str = strdup(template); - p = strstr(str, "##"); - n = sprintf(p, "%lld", index); - strcat(p + n, strstr(template, "##") + 2); +bool check_reject_set(int err) +{ + return err == EPERM; +} - return (const char *)str; +void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) +{ + struct vcpu_reg_sublist *s; + int feature; + + for_each_sublist(c, s) { + if (s->finalize) { + feature = s->feature; + vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature); + } + } } #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) @@ -199,7 +103,7 @@ static const char *str_with_index(const char *template, __u64 index) #define CORE_SPSR_XX_NR_WORDS 2 #define CORE_FPREGS_XX_NR_WORDS 4 -static const char *core_id_to_str(struct vcpu_config *c, __u64 id) +static const char *core_id_to_str(const char *prefix, __u64 id) { __u64 core_off = id & ~REG_MASK, idx; @@ -210,8 +114,8 @@ static const char *core_id_to_str(struct vcpu_config *c, __u64 id) case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... KVM_REG_ARM_CORE_REG(regs.regs[30]): idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; - TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx); - return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx); + TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx); + return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx); case KVM_REG_ARM_CORE_REG(regs.sp): return "KVM_REG_ARM_CORE_REG(regs.sp)"; case KVM_REG_ARM_CORE_REG(regs.pc): @@ -225,24 +129,24 @@ static const char *core_id_to_str(struct vcpu_config *c, __u64 id) case KVM_REG_ARM_CORE_REG(spsr[0]) ... KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; - TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx); - return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx); + TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx); + return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx); case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; - TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx); - return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx); + TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx); + return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx); case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; } - TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id); + TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id); return NULL; } -static const char *sve_id_to_str(struct vcpu_config *c, __u64 id) +static const char *sve_id_to_str(const char *prefix, __u64 id) { __u64 sve_off, n, i; @@ -252,37 +156,37 @@ static const char *sve_id_to_str(struct vcpu_config *c, __u64 id) sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); - TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id); + TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id); switch (sve_off) { case KVM_REG_ARM64_SVE_ZREG_BASE ... KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), - "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id); - return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n); + "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id); + return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n); case KVM_REG_ARM64_SVE_PREG_BASE ... KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), - "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id); - return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n); + "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id); + return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n); case KVM_REG_ARM64_SVE_FFR_BASE: TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), - "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id); return "KVM_REG_ARM64_SVE_FFR(0)"; } return NULL; } -static void print_reg(struct vcpu_config *c, __u64 id) +void print_reg(const char *prefix, __u64 id) { unsigned op0, op1, crn, crm, op2; const char *reg_size = NULL; TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, - "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id); + "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id); switch (id & KVM_REG_SIZE_MASK) { case KVM_REG_SIZE_U8: @@ -314,16 +218,16 @@ static void print_reg(struct vcpu_config *c, __u64 id) break; default: TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx", - config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); } switch (id & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: - printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id)); + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id)); break; case KVM_REG_ARM_DEMUX: TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), - "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id); printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); break; @@ -334,370 +238,34 @@ static void print_reg(struct vcpu_config *c, __u64 id) crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), - "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id); printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); break; case KVM_REG_ARM_FW: TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), - "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id); printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); break; case KVM_REG_ARM_FW_FEAT_BMAP: TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff), - "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id); printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff); break; case KVM_REG_ARM64_SVE: - if (has_cap(c, KVM_CAP_ARM_SVE)) - printf("\t%s,\n", sve_id_to_str(c, id)); - else - TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id); + printf("\t%s,\n", sve_id_to_str(prefix, id)); break; default: TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx", - config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); - } -} - -/* - * Older kernels listed each 32-bit word of CORE registers separately. - * For 64 and 128-bit registers we need to ignore the extra words. We - * also need to fixup the sizes, because the older kernels stated all - * registers were 64-bit, even when they weren't. - */ -static void core_reg_fixup(void) -{ - struct kvm_reg_list *tmp; - __u64 id, core_off; - int i; - - tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64)); - - for (i = 0; i < reg_list->n; ++i) { - id = reg_list->reg[i]; - - if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) { - tmp->reg[tmp->n++] = id; - continue; - } - - core_off = id & ~REG_MASK; - - switch (core_off) { - case 0x52: case 0xd2: case 0xd6: - /* - * These offsets are pointing at padding. - * We need to ignore them too. - */ - continue; - case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... - KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): - if (core_off & 3) - continue; - id &= ~KVM_REG_SIZE_MASK; - id |= KVM_REG_SIZE_U128; - tmp->reg[tmp->n++] = id; - continue; - case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): - case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): - id &= ~KVM_REG_SIZE_MASK; - id |= KVM_REG_SIZE_U32; - tmp->reg[tmp->n++] = id; - continue; - default: - if (core_off & 1) - continue; - tmp->reg[tmp->n++] = id; - break; - } + prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); } - - free(reg_list); - reg_list = tmp; -} - -static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init) -{ - struct reg_sublist *s; - - for_each_sublist(c, s) - if (s->capability) - init->features[s->feature / 32] |= 1 << (s->feature % 32); -} - -static void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_config *c) -{ - struct reg_sublist *s; - int feature; - - for_each_sublist(c, s) { - if (s->finalize) { - feature = s->feature; - vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature); - } - } -} - -static void check_supported(struct vcpu_config *c) -{ - struct reg_sublist *s; - - for_each_sublist(c, s) { - if (!s->capability) - continue; - - __TEST_REQUIRE(kvm_has_cap(s->capability), - "%s: %s not available, skipping tests\n", - config_name(c), s->name); - } -} - -static bool print_list; -static bool print_filtered; -static bool fixup_core_regs; - -static void run_test(struct vcpu_config *c) -{ - struct kvm_vcpu_init init = { .target = -1, }; - int new_regs = 0, missing_regs = 0, i, n; - int failed_get = 0, failed_set = 0, failed_reject = 0; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct reg_sublist *s; - - check_supported(c); - - vm = vm_create_barebones(); - prepare_vcpu_init(c, &init); - vcpu = __vm_vcpu_add(vm, 0); - aarch64_vcpu_setup(vcpu, &init); - finalize_vcpu(vcpu, c); - - reg_list = vcpu_get_reg_list(vcpu); - - if (fixup_core_regs) - core_reg_fixup(); - - if (print_list || print_filtered) { - putchar('\n'); - for_each_reg(i) { - __u64 id = reg_list->reg[i]; - if ((print_list && !filter_reg(id)) || - (print_filtered && filter_reg(id))) - print_reg(c, id); - } - putchar('\n'); - return; - } - - /* - * We only test that we can get the register and then write back the - * same value. Some registers may allow other values to be written - * back, but others only allow some bits to be changed, and at least - * for ID registers set will fail if the value does not exactly match - * what was returned by get. If registers that allow other values to - * be written need to have the other values tested, then we should - * create a new set of tests for those in a new independent test - * executable. - */ - for_each_reg(i) { - uint8_t addr[2048 / 8]; - struct kvm_one_reg reg = { - .id = reg_list->reg[i], - .addr = (__u64)&addr, - }; - bool reject_reg = false; - int ret; - - ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr); - if (ret) { - printf("%s: Failed to get ", config_name(c)); - print_reg(c, reg.id); - putchar('\n'); - ++failed_get; - } - - /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */ - for_each_sublist(c, s) { - if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) { - reject_reg = true; - ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); - if (ret != -1 || errno != EPERM) { - printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno); - print_reg(c, reg.id); - putchar('\n'); - ++failed_reject; - } - break; - } - } - - if (!reject_reg) { - ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); - if (ret) { - printf("%s: Failed to set ", config_name(c)); - print_reg(c, reg.id); - putchar('\n'); - ++failed_set; - } - } - } - - for_each_sublist(c, s) - blessed_n += s->regs_n; - blessed_reg = calloc(blessed_n, sizeof(__u64)); - - n = 0; - for_each_sublist(c, s) { - for (i = 0; i < s->regs_n; ++i) - blessed_reg[n++] = s->regs[i]; - } - - for_each_new_reg(i) - ++new_regs; - - for_each_missing_reg(i) - ++missing_regs; - - if (new_regs || missing_regs) { - n = 0; - for_each_reg_filtered(i) - ++n; - - printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n); - printf("%s: Number registers: %5lld (includes %lld filtered registers)\n", - config_name(c), reg_list->n, reg_list->n - n); - } - - if (new_regs) { - printf("\n%s: There are %d new registers.\n" - "Consider adding them to the blessed reg " - "list with the following lines:\n\n", config_name(c), new_regs); - for_each_new_reg(i) - print_reg(c, reg_list->reg[i]); - putchar('\n'); - } - - if (missing_regs) { - printf("\n%s: There are %d missing registers.\n" - "The following lines are missing registers:\n\n", config_name(c), missing_regs); - for_each_missing_reg(i) - print_reg(c, blessed_reg[i]); - putchar('\n'); - } - - TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, - "%s: There are %d missing registers; " - "%d registers failed get; %d registers failed set; %d registers failed reject", - config_name(c), missing_regs, failed_get, failed_set, failed_reject); - - pr_info("%s: PASS\n", config_name(c)); - blessed_n = 0; - free(blessed_reg); - free(reg_list); - kvm_vm_free(vm); -} - -static void help(void) -{ - struct vcpu_config *c; - int i; - - printf( - "\n" - "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered] [--core-reg-fixup]\n\n" - " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n" - " '<selection>' may be\n"); - - for (i = 0; i < vcpu_configs_n; ++i) { - c = vcpu_configs[i]; - printf( - " '%s'\n", config_name(c)); - } - - printf( - "\n" - " --list Print the register list rather than test it (requires --config)\n" - " --list-filtered Print registers that would normally be filtered out (requires --config)\n" - " --core-reg-fixup Needed when running on old kernels with broken core reg listings\n" - "\n" - ); -} - -static struct vcpu_config *parse_config(const char *config) -{ - struct vcpu_config *c; - int i; - - if (config[8] != '=') - help(), exit(1); - - for (i = 0; i < vcpu_configs_n; ++i) { - c = vcpu_configs[i]; - if (strcmp(config_name(c), &config[9]) == 0) - break; - } - - if (i == vcpu_configs_n) - help(), exit(1); - - return c; -} - -int main(int ac, char **av) -{ - struct vcpu_config *c, *sel = NULL; - int i, ret = 0; - pid_t pid; - - for (i = 1; i < ac; ++i) { - if (strcmp(av[i], "--core-reg-fixup") == 0) - fixup_core_regs = true; - else if (strncmp(av[i], "--config", 8) == 0) - sel = parse_config(av[i]); - else if (strcmp(av[i], "--list") == 0) - print_list = true; - else if (strcmp(av[i], "--list-filtered") == 0) - print_filtered = true; - else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0) - help(), exit(0); - else - help(), exit(1); - } - - if (print_list || print_filtered) { - /* - * We only want to print the register list of a single config. - */ - if (!sel) - help(), exit(1); - } - - for (i = 0; i < vcpu_configs_n; ++i) { - c = vcpu_configs[i]; - if (sel && c != sel) - continue; - - pid = fork(); - - if (!pid) { - run_test(c); - exit(0); - } else { - int wstatus; - pid_t wpid = wait(&wstatus); - TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return"); - if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP) - ret = KSFT_FAIL; - } - } - - return ret; } /* - * The current blessed list was primed with the output of kernel version + * The original blessed list was primed with the output of kernel version * v4.15 with --core-reg-fixup and then later updated with new registers. + * (The --core-reg-fixup option and it's fixup function have been removed + * from the test, as it's unlikely to use this type of test on a kernel + * older than v5.2.) * * The blessed list is up to date with kernel version v6.4 (or so we hope) */ @@ -1130,14 +698,14 @@ static __u64 pauth_generic_regs[] = { .regs_n = ARRAY_SIZE(pauth_generic_regs), \ } -static struct vcpu_config vregs_config = { +static struct vcpu_reg_list vregs_config = { .sublists = { BASE_SUBLIST, VREGS_SUBLIST, {0}, }, }; -static struct vcpu_config vregs_pmu_config = { +static struct vcpu_reg_list vregs_pmu_config = { .sublists = { BASE_SUBLIST, VREGS_SUBLIST, @@ -1145,14 +713,14 @@ static struct vcpu_config vregs_pmu_config = { {0}, }, }; -static struct vcpu_config sve_config = { +static struct vcpu_reg_list sve_config = { .sublists = { BASE_SUBLIST, SVE_SUBLIST, {0}, }, }; -static struct vcpu_config sve_pmu_config = { +static struct vcpu_reg_list sve_pmu_config = { .sublists = { BASE_SUBLIST, SVE_SUBLIST, @@ -1160,7 +728,7 @@ static struct vcpu_config sve_pmu_config = { {0}, }, }; -static struct vcpu_config pauth_config = { +static struct vcpu_reg_list pauth_config = { .sublists = { BASE_SUBLIST, VREGS_SUBLIST, @@ -1168,7 +736,7 @@ static struct vcpu_config pauth_config = { {0}, }, }; -static struct vcpu_config pauth_pmu_config = { +static struct vcpu_reg_list pauth_pmu_config = { .sublists = { BASE_SUBLIST, VREGS_SUBLIST, @@ -1178,7 +746,7 @@ static struct vcpu_config pauth_pmu_config = { }, }; -static struct vcpu_config *vcpu_configs[] = { +struct vcpu_reg_list *vcpu_configs[] = { &vregs_config, &vregs_pmu_config, &sve_config, @@ -1186,4 +754,4 @@ static struct vcpu_config *vcpu_configs[] = { &pauth_config, &pauth_pmu_config, }; -static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); +int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c index bef1499fb465..31f66ba97228 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -8,7 +8,6 @@ * hypercalls are properly masked or unmasked to the guest when disabled or * enabled from the KVM userspace, respectively. */ - #include <errno.h> #include <linux/arm-smccc.h> #include <asm/kvm.h> @@ -105,15 +104,17 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info) switch (stage) { case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: case TEST_STAGE_HVC_IFACE_FALSE_INFO: - GUEST_ASSERT_3(res.a0 == SMCCC_RET_NOT_SUPPORTED, - res.a0, hc_info->func_id, hc_info->arg1); + __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED, + "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u", + res.a0, hc_info->func_id, hc_info->arg1, stage); break; case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: - GUEST_ASSERT_3(res.a0 != SMCCC_RET_NOT_SUPPORTED, - res.a0, hc_info->func_id, hc_info->arg1); + __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED, + "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u", + res.a0, hc_info->func_id, hc_info->arg1, stage); break; default: - GUEST_ASSERT_1(0, stage); + GUEST_FAIL("Unexpected stage = %u", stage); } } } @@ -132,7 +133,7 @@ static void guest_code(void) guest_test_hvc(false_hvc_info); break; default: - GUEST_ASSERT_1(0, stage); + GUEST_FAIL("Unexpected stage = %u", stage); } GUEST_SYNC(stage); @@ -290,10 +291,7 @@ static void test_run(void) guest_done = true; break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_N(uc, "values: 0x%lx, 0x%lx; 0x%lx, stage: %u", - GUEST_ASSERT_ARG(uc, 0), - GUEST_ASSERT_ARG(uc, 1), - GUEST_ASSERT_ARG(uc, 2), stage); + REPORT_GUEST_ASSERT(uc); break; default: TEST_FAIL("Unexpected guest exit\n"); diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index df10f1ffa20d..47bb914ab2fa 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -7,7 +7,6 @@ * hugetlbfs with a hole). It checks that the expected handling method is * called (e.g., uffd faults with the right address and write/read flag). */ - #define _GNU_SOURCE #include <linux/bitmap.h> #include <fcntl.h> @@ -293,12 +292,12 @@ static void guest_code(struct test_desc *test) static void no_dabt_handler(struct ex_regs *regs) { - GUEST_ASSERT_1(false, read_sysreg(far_el1)); + GUEST_FAIL("Unexpected dabt, far_el1 = 0x%llx", read_sysreg(far_el1)); } static void no_iabt_handler(struct ex_regs *regs) { - GUEST_ASSERT_1(false, regs->pc); + GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc); } static struct uffd_args { @@ -318,7 +317,7 @@ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, "The only expected UFFD mode is MISSING"); - ASSERT_EQ(addr, (uint64_t)args->hva); + TEST_ASSERT_EQ(addr, (uint64_t)args->hva); pr_debug("uffd fault: addr=%p write=%d\n", (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE)); @@ -432,7 +431,7 @@ static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run) region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); hva = (void *)region->region.userspace_addr; - ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); + TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); memcpy(hva, run->mmio.data, run->mmio.len); events.mmio_exits += 1; @@ -631,9 +630,9 @@ static void setup_default_handlers(struct test_desc *test) static void check_event_counts(struct test_desc *test) { - ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); - ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); - ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); + TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); + TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); + TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); } static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) @@ -679,7 +678,7 @@ static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, } break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 90d854e0fcff..2e64b4856e38 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -7,7 +7,6 @@ * host to inject a specific intid via a GUEST_SYNC call, and then checks that * it received it. */ - #include <asm/kvm.h> #include <asm/kvm_para.h> #include <sys/eventfd.h> @@ -781,7 +780,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) run_guest_cmd(vcpu, gic_fd, &inject_args, &args); break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c new file mode 100644 index 000000000000..be7bf5224434 --- /dev/null +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (C) 2020, Red Hat, Inc. + * + * When attempting to migrate from a host with an older kernel to a host + * with a newer kernel we allow the newer kernel on the destination to + * list new registers with get-reg-list. We assume they'll be unused, at + * least until the guest reboots, and so they're relatively harmless. + * However, if the destination host with the newer kernel is missing + * registers which the source host with the older kernel has, then that's + * a regression in get-reg-list. This test checks for that regression by + * checking the current list against a blessed list. We should never have + * missing registers, but if new ones appear then they can probably be + * added to the blessed list. A completely new blessed list can be created + * by running the test with the --list command line argument. + * + * The blessed list should be created from the oldest possible kernel. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" + +static struct kvm_reg_list *reg_list; +static __u64 *blessed_reg, blessed_n; + +extern struct vcpu_reg_list *vcpu_configs[]; +extern int vcpu_configs_n; + +#define for_each_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) + +#define for_each_reg_filtered(i) \ + for_each_reg(i) \ + if (!filter_reg(reg_list->reg[i])) + +#define for_each_missing_reg(i) \ + for ((i) = 0; (i) < blessed_n; ++(i)) \ + if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) \ + if (check_supported_reg(vcpu, blessed_reg[i])) + +#define for_each_new_reg(i) \ + for_each_reg_filtered(i) \ + if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) + +#define for_each_present_blessed_reg(i) \ + for_each_reg(i) \ + if (find_reg(blessed_reg, blessed_n, reg_list->reg[i])) + +static const char *config_name(struct vcpu_reg_list *c) +{ + struct vcpu_reg_sublist *s; + int len = 0; + + if (c->name) + return c->name; + + for_each_sublist(c, s) + len += strlen(s->name) + 1; + + c->name = malloc(len); + + len = 0; + for_each_sublist(c, s) { + if (!strcmp(s->name, "base")) + continue; + strcat(c->name + len, s->name); + len += strlen(s->name) + 1; + c->name[len - 1] = '+'; + } + c->name[len - 1] = '\0'; + + return c->name; +} + +bool __weak check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg) +{ + return true; +} + +bool __weak filter_reg(__u64 reg) +{ + return false; +} + +static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) +{ + int i; + + for (i = 0; i < nr_regs; ++i) + if (reg == regs[i]) + return true; + return false; +} + +void __weak print_reg(const char *prefix, __u64 id) +{ + printf("\t0x%llx,\n", id); +} + +bool __weak check_reject_set(int err) +{ + return true; +} + +void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) +{ +} + +#ifdef __aarch64__ +static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init) +{ + struct vcpu_reg_sublist *s; + + for_each_sublist(c, s) + if (s->capability) + init->features[s->feature / 32] |= 1 << (s->feature % 32); +} + +static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm) +{ + struct kvm_vcpu_init init = { .target = -1, }; + struct kvm_vcpu *vcpu; + + prepare_vcpu_init(c, &init); + vcpu = __vm_vcpu_add(vm, 0); + aarch64_vcpu_setup(vcpu, &init); + + return vcpu; +} +#else +static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm) +{ + return __vm_vcpu_add(vm, 0); +} +#endif + +static void check_supported(struct vcpu_reg_list *c) +{ + struct vcpu_reg_sublist *s; + + for_each_sublist(c, s) { + if (!s->capability) + continue; + + __TEST_REQUIRE(kvm_has_cap(s->capability), + "%s: %s not available, skipping tests\n", + config_name(c), s->name); + } +} + +static bool print_list; +static bool print_filtered; + +static void run_test(struct vcpu_reg_list *c) +{ + int new_regs = 0, missing_regs = 0, i, n; + int failed_get = 0, failed_set = 0, failed_reject = 0; + int skipped_set = 0; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct vcpu_reg_sublist *s; + + check_supported(c); + + vm = vm_create_barebones(); + vcpu = vcpu_config_get_vcpu(c, vm); + finalize_vcpu(vcpu, c); + + reg_list = vcpu_get_reg_list(vcpu); + + if (print_list || print_filtered) { + putchar('\n'); + for_each_reg(i) { + __u64 id = reg_list->reg[i]; + if ((print_list && !filter_reg(id)) || + (print_filtered && filter_reg(id))) + print_reg(config_name(c), id); + } + putchar('\n'); + return; + } + + for_each_sublist(c, s) + blessed_n += s->regs_n; + blessed_reg = calloc(blessed_n, sizeof(__u64)); + + n = 0; + for_each_sublist(c, s) { + for (i = 0; i < s->regs_n; ++i) + blessed_reg[n++] = s->regs[i]; + } + + /* + * We only test that we can get the register and then write back the + * same value. Some registers may allow other values to be written + * back, but others only allow some bits to be changed, and at least + * for ID registers set will fail if the value does not exactly match + * what was returned by get. If registers that allow other values to + * be written need to have the other values tested, then we should + * create a new set of tests for those in a new independent test + * executable. + * + * Only do the get/set tests on present, blessed list registers, + * since we don't know the capabilities of any new registers. + */ + for_each_present_blessed_reg(i) { + uint8_t addr[2048 / 8]; + struct kvm_one_reg reg = { + .id = reg_list->reg[i], + .addr = (__u64)&addr, + }; + bool reject_reg = false, skip_reg = false; + int ret; + + ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr); + if (ret) { + printf("%s: Failed to get ", config_name(c)); + print_reg(config_name(c), reg.id); + putchar('\n'); + ++failed_get; + } + + for_each_sublist(c, s) { + /* rejects_set registers are rejected for set operation */ + if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) { + reject_reg = true; + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); + if (ret != -1 || !check_reject_set(errno)) { + printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno); + print_reg(config_name(c), reg.id); + putchar('\n'); + ++failed_reject; + } + break; + } + + /* skips_set registers are skipped for set operation */ + if (s->skips_set && find_reg(s->skips_set, s->skips_set_n, reg.id)) { + skip_reg = true; + ++skipped_set; + break; + } + } + + if (!reject_reg && !skip_reg) { + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); + if (ret) { + printf("%s: Failed to set ", config_name(c)); + print_reg(config_name(c), reg.id); + putchar('\n'); + ++failed_set; + } + } + } + + for_each_new_reg(i) + ++new_regs; + + for_each_missing_reg(i) + ++missing_regs; + + if (new_regs || missing_regs) { + n = 0; + for_each_reg_filtered(i) + ++n; + + printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n); + printf("%s: Number registers: %5lld (includes %lld filtered registers)\n", + config_name(c), reg_list->n, reg_list->n - n); + } + + if (new_regs) { + printf("\n%s: There are %d new registers.\n" + "Consider adding them to the blessed reg " + "list with the following lines:\n\n", config_name(c), new_regs); + for_each_new_reg(i) + print_reg(config_name(c), reg_list->reg[i]); + putchar('\n'); + } + + if (missing_regs) { + printf("\n%s: There are %d missing registers.\n" + "The following lines are missing registers:\n\n", config_name(c), missing_regs); + for_each_missing_reg(i) + print_reg(config_name(c), blessed_reg[i]); + putchar('\n'); + } + + TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, + "%s: There are %d missing registers; %d registers failed get; " + "%d registers failed set; %d registers failed reject; %d registers skipped set", + config_name(c), missing_regs, failed_get, failed_set, failed_reject, skipped_set); + + pr_info("%s: PASS\n", config_name(c)); + blessed_n = 0; + free(blessed_reg); + free(reg_list); + kvm_vm_free(vm); +} + +static void help(void) +{ + struct vcpu_reg_list *c; + int i; + + printf( + "\n" + "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered]\n\n" + " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n" + " '<selection>' may be\n"); + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + printf( + " '%s'\n", config_name(c)); + } + + printf( + "\n" + " --list Print the register list rather than test it (requires --config)\n" + " --list-filtered Print registers that would normally be filtered out (requires --config)\n" + "\n" + ); +} + +static struct vcpu_reg_list *parse_config(const char *config) +{ + struct vcpu_reg_list *c = NULL; + int i; + + if (config[8] != '=') + help(), exit(1); + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + if (strcmp(config_name(c), &config[9]) == 0) + break; + } + + if (i == vcpu_configs_n) + help(), exit(1); + + return c; +} + +int main(int ac, char **av) +{ + struct vcpu_reg_list *c, *sel = NULL; + int i, ret = 0; + pid_t pid; + + for (i = 1; i < ac; ++i) { + if (strncmp(av[i], "--config", 8) == 0) + sel = parse_config(av[i]); + else if (strcmp(av[i], "--list") == 0) + print_list = true; + else if (strcmp(av[i], "--list-filtered") == 0) + print_filtered = true; + else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0) + help(), exit(0); + else + help(), exit(1); + } + + if (print_list || print_filtered) { + /* + * We only want to print the register list of a single config. + */ + if (!sel) + help(), exit(1); + } + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + if (sel && c != sel) + continue; + + pid = fork(); + + if (!pid) { + run_test(c); + exit(0); + } else { + int wstatus; + pid_t wpid = wait(&wstatus); + TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return"); + if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP) + ret = KSFT_FAIL; + } + } + + return ret; +} diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c new file mode 100644 index 000000000000..41230b746190 --- /dev/null +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A test for GUEST_PRINTF + * + * Copyright 2022, Google, Inc. and/or its affiliates. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +struct guest_vals { + uint64_t a; + uint64_t b; + uint64_t type; +}; + +static struct guest_vals vals; + +/* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */ +#define TYPE_LIST \ +TYPE(test_type_i64, I64, "%ld", int64_t) \ +TYPE(test_type_u64, U64u, "%lu", uint64_t) \ +TYPE(test_type_x64, U64x, "0x%lx", uint64_t) \ +TYPE(test_type_X64, U64X, "0x%lX", uint64_t) \ +TYPE(test_type_u32, U32u, "%u", uint32_t) \ +TYPE(test_type_x32, U32x, "0x%x", uint32_t) \ +TYPE(test_type_X32, U32X, "0x%X", uint32_t) \ +TYPE(test_type_int, INT, "%d", int) \ +TYPE(test_type_char, CHAR, "%c", char) \ +TYPE(test_type_str, STR, "'%s'", const char *) \ +TYPE(test_type_ptr, PTR, "%p", uintptr_t) + +enum args_type { +#define TYPE(fn, ext, fmt_t, T) TYPE_##ext, + TYPE_LIST +#undef TYPE +}; + +static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, + const char *expected_assert); + +#define BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T) \ +const char *PRINTF_FMT_##ext = "Got params a = " fmt_t " and b = " fmt_t; \ +const char *ASSERT_FMT_##ext = "Expected " fmt_t ", got " fmt_t " instead"; \ +static void fn(struct kvm_vcpu *vcpu, T a, T b) \ +{ \ + char expected_printf[UCALL_BUFFER_LEN]; \ + char expected_assert[UCALL_BUFFER_LEN]; \ + \ + snprintf(expected_printf, UCALL_BUFFER_LEN, PRINTF_FMT_##ext, a, b); \ + snprintf(expected_assert, UCALL_BUFFER_LEN, ASSERT_FMT_##ext, a, b); \ + vals = (struct guest_vals){ (uint64_t)a, (uint64_t)b, TYPE_##ext }; \ + sync_global_to_guest(vcpu->vm, vals); \ + run_test(vcpu, expected_printf, expected_assert); \ +} + +#define TYPE(fn, ext, fmt_t, T) \ + BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T) + TYPE_LIST +#undef TYPE + +static void guest_code(void) +{ + while (1) { + switch (vals.type) { +#define TYPE(fn, ext, fmt_t, T) \ + case TYPE_##ext: \ + GUEST_PRINTF(PRINTF_FMT_##ext, vals.a, vals.b); \ + __GUEST_ASSERT(vals.a == vals.b, \ + ASSERT_FMT_##ext, vals.a, vals.b); \ + break; + TYPE_LIST +#undef TYPE + default: + GUEST_SYNC(vals.type); + } + + GUEST_DONE(); + } +} + +/* + * Unfortunately this gets a little messy because 'assert_msg' doesn't + * just contains the matching string, it also contains additional assert + * info. Fortunately the part that matches should be at the very end of + * 'assert_msg'. + */ +static void ucall_abort(const char *assert_msg, const char *expected_assert_msg) +{ + int len_str = strlen(assert_msg); + int len_substr = strlen(expected_assert_msg); + int offset = len_str - len_substr; + + TEST_ASSERT(len_substr <= len_str, + "Expected '%s' to be a substring of '%s'\n", + assert_msg, expected_assert_msg); + + TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0, + "Unexpected mismatch. Expected: '%s', got: '%s'", + expected_assert_msg, &assert_msg[offset]); +} + +static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, + const char *expected_assert) +{ + struct kvm_run *run = vcpu->run; + struct ucall uc; + + while (1) { + vcpu_run(vcpu); + + TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + TEST_FAIL("Unknown 'args_type' = %lu", uc.args[1]); + break; + case UCALL_PRINTF: + TEST_ASSERT(strcmp(uc.buffer, expected_printf) == 0, + "Unexpected mismatch. Expected: '%s', got: '%s'", + expected_printf, uc.buffer); + break; + case UCALL_ABORT: + ucall_abort(uc.buffer, expected_assert); + break; + case UCALL_DONE: + return; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } +} + +static void guest_code_limits(void) +{ + char test_str[UCALL_BUFFER_LEN + 10]; + + memset(test_str, 'a', sizeof(test_str)); + test_str[sizeof(test_str) - 1] = 0; + + GUEST_PRINTF("%s", test_str); +} + +static void test_limits(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits); + run = vcpu->run; + vcpu_run(vcpu); + + TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, exit_reason_str(run->exit_reason)); + + TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT, + "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)\n", + uc.cmd, UCALL_ABORT); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + test_type_i64(vcpu, -1, -1); + test_type_i64(vcpu, -1, 1); + test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + + test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + + test_type_u32(vcpu, 0x90abcdef, 0x90abcdef); + test_type_u32(vcpu, 0x90abcdef, 0x90abcdee); + test_type_x32(vcpu, 0x90abcdef, 0x90abcdef); + test_type_x32(vcpu, 0x90abcdef, 0x90abcdee); + test_type_X32(vcpu, 0x90abcdef, 0x90abcdef); + test_type_X32(vcpu, 0x90abcdef, 0x90abcdee); + + test_type_int(vcpu, -1, -1); + test_type_int(vcpu, -1, 1); + test_type_int(vcpu, 1, 1); + + test_type_char(vcpu, 'a', 'a'); + test_type_char(vcpu, 'a', 'A'); + test_type_char(vcpu, 'a', 'b'); + + test_type_str(vcpu, "foo", "foo"); + test_type_str(vcpu, "foo", "bar"); + + test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + + kvm_vm_free(vm); + + test_limits(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h index cb7c03de3a21..b3e97525cb55 100644 --- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h @@ -41,7 +41,7 @@ static inline uint64_t timer_get_cntct(enum arch_timer timer) case PHYSICAL: return read_sysreg(cntpct_el0); default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } /* We should not reach here */ @@ -58,7 +58,7 @@ static inline void timer_set_cval(enum arch_timer timer, uint64_t cval) write_sysreg(cval, cntp_cval_el0); break; default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } isb(); @@ -72,7 +72,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer) case PHYSICAL: return read_sysreg(cntp_cval_el0); default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } /* We should not reach here */ @@ -89,7 +89,7 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval) write_sysreg(tval, cntp_tval_el0); break; default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } isb(); @@ -105,7 +105,7 @@ static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) write_sysreg(ctl, cntp_ctl_el0); break; default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } isb(); @@ -119,7 +119,7 @@ static inline uint32_t timer_get_ctl(enum arch_timer timer) case PHYSICAL: return read_sysreg(cntp_ctl_el0); default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } /* We should not reach here */ diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h new file mode 100644 index 000000000000..4b68f37efd36 --- /dev/null +++ b/tools/testing/selftests/kvm/include/aarch64/ucall.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util_base.h" + +#define UCALL_EXIT_REASON KVM_EXIT_MMIO + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +extern vm_vaddr_t *ucall_exit_mmio_addr; + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + WRITE_ONCE(*ucall_exit_mmio_addr, uc); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index eb1ff597bcca..a18db6a7b3cf 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/kvm.h> #include "linux/rbtree.h" +#include <linux/types.h> #include <asm/atomic.h> @@ -124,6 +125,26 @@ struct kvm_vm { uint32_t memslots[NR_MEM_REGIONS]; }; +struct vcpu_reg_sublist { + const char *name; + long capability; + int feature; + bool finalize; + __u64 *regs; + __u64 regs_n; + __u64 *rejects_set; + __u64 rejects_set_n; + __u64 *skips_set; + __u64 skips_set_n; +}; + +struct vcpu_reg_list { + char *name; + struct vcpu_reg_sublist sublists[]; +}; + +#define for_each_sublist(c, s) \ + for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) #define kvm_for_each_vcpu(vm, i, vcpu) \ for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \ diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index d00d213c3805..5b62a3d2aa9b 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -38,6 +38,9 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx, KVM_REG_RISCV_TIMER_REG(name), \ KVM_REG_SIZE_U64) +#define RISCV_ISA_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_ISA_EXT, \ + idx, KVM_REG_SIZE_ULONG) + /* L3 index Bit[47:39] */ #define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL #define PGTBL_L3_INDEX_SHIFT 39 diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h new file mode 100644 index 000000000000..be46eb32ec27 --- /dev/null +++ b/tools/testing/selftests/kvm/include/riscv/ucall.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "processor.h" + +#define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI + +static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ +} + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, + KVM_RISCV_SELFTESTS_SBI_UCALL, + uc, 0, 0, 0, 0, 0); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h new file mode 100644 index 000000000000..b231bf2e49d6 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/ucall.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util_base.h" + +#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC + +static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ +} + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ + asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index a6e9f215ce70..7e614adc6cf4 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -53,14 +53,13 @@ void test_assert(bool exp, const char *exp_str, #define TEST_ASSERT(e, fmt, ...) \ test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__) -#define ASSERT_EQ(a, b) do { \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - TEST_ASSERT(__a == __b, \ - "ASSERT_EQ(%s, %s) failed.\n" \ - "\t%s is %#lx\n" \ - "\t%s is %#lx", \ - #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ +#define TEST_ASSERT_EQ(a, b) \ +do { \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + test_assert(__a == __b, #a " == " #b, __FILE__, __LINE__, \ + "%#lx != %#lx (%s != %s)", \ + (unsigned long)(__a), (unsigned long)(__b), #a, #b);\ } while (0) #define TEST_ASSERT_KVM_EXIT_REASON(vcpu, expected) do { \ @@ -186,4 +185,9 @@ static inline uint32_t atoi_non_negative(const char *name, const char *num_str) return num; } +int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args); +int guest_snprintf(char *buf, int n, const char *fmt, ...); + +char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1))); + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 1a6aaef5ccae..112bc1da732a 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -7,21 +7,25 @@ #ifndef SELFTEST_KVM_UCALL_COMMON_H #define SELFTEST_KVM_UCALL_COMMON_H #include "test_util.h" +#include "ucall.h" /* Common ucalls */ enum { UCALL_NONE, UCALL_SYNC, UCALL_ABORT, + UCALL_PRINTF, UCALL_DONE, UCALL_UNHANDLED, }; #define UCALL_MAX_ARGS 7 +#define UCALL_BUFFER_LEN 1024 struct ucall { uint64_t cmd; uint64_t args[UCALL_MAX_ARGS]; + char buffer[UCALL_BUFFER_LEN]; /* Host virtual address of this struct. */ struct ucall *hva; @@ -32,8 +36,12 @@ void ucall_arch_do_ucall(vm_vaddr_t uc); void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); void ucall(uint64_t cmd, int nargs, ...); +void ucall_fmt(uint64_t cmd, const char *fmt, ...); +void ucall_assert(uint64_t cmd, const char *exp, const char *file, + unsigned int line, const char *fmt, ...); uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); +int ucall_nr_pages_required(uint64_t page_size); /* * Perform userspace call without any associated data. This bare call avoids @@ -46,8 +54,11 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) +#define GUEST_PRINTF(_fmt, _args...) ucall_fmt(UCALL_PRINTF, _fmt, ##_args) #define GUEST_DONE() ucall(UCALL_DONE, 0) +#define REPORT_GUEST_PRINTF(ucall) pr_info("%s", (ucall).buffer) + enum guest_assert_builtin_args { GUEST_ERROR_STRING, GUEST_FILE, @@ -55,70 +66,41 @@ enum guest_assert_builtin_args { GUEST_ASSERT_BUILTIN_NARGS }; -#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) \ -do { \ - if (!(_condition)) \ - ucall(UCALL_ABORT, GUEST_ASSERT_BUILTIN_NARGS + _nargs, \ - "Failed guest assert: " _condstr, \ - __FILE__, __LINE__, ##_args); \ +#define ____GUEST_ASSERT(_condition, _exp, _fmt, _args...) \ +do { \ + if (!(_condition)) \ + ucall_assert(UCALL_ABORT, _exp, __FILE__, __LINE__, _fmt, ##_args); \ } while (0) -#define GUEST_ASSERT(_condition) \ - __GUEST_ASSERT(_condition, #_condition, 0, 0) - -#define GUEST_ASSERT_1(_condition, arg1) \ - __GUEST_ASSERT(_condition, #_condition, 1, (arg1)) - -#define GUEST_ASSERT_2(_condition, arg1, arg2) \ - __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2)) - -#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \ - __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3)) - -#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \ - __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4)) - -#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b) +#define __GUEST_ASSERT(_condition, _fmt, _args...) \ + ____GUEST_ASSERT(_condition, #_condition, _fmt, ##_args) -#define __REPORT_GUEST_ASSERT(_ucall, fmt, _args...) \ - TEST_FAIL("%s at %s:%ld\n" fmt, \ - (const char *)(_ucall).args[GUEST_ERROR_STRING], \ - (const char *)(_ucall).args[GUEST_FILE], \ - (_ucall).args[GUEST_LINE], \ - ##_args) +#define GUEST_ASSERT(_condition) \ + __GUEST_ASSERT(_condition, #_condition) -#define GUEST_ASSERT_ARG(ucall, i) ((ucall).args[GUEST_ASSERT_BUILTIN_NARGS + i]) +#define GUEST_FAIL(_fmt, _args...) \ + ucall_assert(UCALL_ABORT, "Unconditional guest failure", \ + __FILE__, __LINE__, _fmt, ##_args) -#define REPORT_GUEST_ASSERT(ucall) \ - __REPORT_GUEST_ASSERT((ucall), "") - -#define REPORT_GUEST_ASSERT_1(ucall, fmt) \ - __REPORT_GUEST_ASSERT((ucall), \ - fmt, \ - GUEST_ASSERT_ARG((ucall), 0)) - -#define REPORT_GUEST_ASSERT_2(ucall, fmt) \ - __REPORT_GUEST_ASSERT((ucall), \ - fmt, \ - GUEST_ASSERT_ARG((ucall), 0), \ - GUEST_ASSERT_ARG((ucall), 1)) - -#define REPORT_GUEST_ASSERT_3(ucall, fmt) \ - __REPORT_GUEST_ASSERT((ucall), \ - fmt, \ - GUEST_ASSERT_ARG((ucall), 0), \ - GUEST_ASSERT_ARG((ucall), 1), \ - GUEST_ASSERT_ARG((ucall), 2)) +#define GUEST_ASSERT_EQ(a, b) \ +do { \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + ____GUEST_ASSERT(__a == __b, #a " == " #b, "%#lx != %#lx (%s != %s)", \ + (unsigned long)(__a), (unsigned long)(__b), #a, #b); \ +} while (0) -#define REPORT_GUEST_ASSERT_4(ucall, fmt) \ - __REPORT_GUEST_ASSERT((ucall), \ - fmt, \ - GUEST_ASSERT_ARG((ucall), 0), \ - GUEST_ASSERT_ARG((ucall), 1), \ - GUEST_ASSERT_ARG((ucall), 2), \ - GUEST_ASSERT_ARG((ucall), 3)) +#define GUEST_ASSERT_NE(a, b) \ +do { \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + ____GUEST_ASSERT(__a != __b, #a " != " #b, "%#lx == %#lx (%s == %s)", \ + (unsigned long)(__a), (unsigned long)(__b), #a, #b); \ +} while (0) -#define REPORT_GUEST_ASSERT_N(ucall, fmt, args...) \ - __REPORT_GUEST_ASSERT((ucall), fmt, ##args) +#define REPORT_GUEST_ASSERT(ucall) \ + test_assert(false, (const char *)(ucall).args[GUEST_ERROR_STRING], \ + (const char *)(ucall).args[GUEST_FILE], \ + (ucall).args[GUEST_LINE], "%s", (ucall).buffer) #endif /* SELFTEST_KVM_UCALL_COMMON_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index aa434c8f19c5..4fd042112526 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -239,7 +239,12 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31) #define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7) #define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) +#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23) #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) +#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7) +#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31) +#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4) +#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12) #define X86_PROPERTY_SUPPORTED_XCR0_LO KVM_X86_CPU_PROPERTY(0xd, 0, EAX, 0, 31) #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31) diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h new file mode 100644 index 000000000000..06b244bd06ee --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86_64/ucall.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util_base.h" + +#define UCALL_EXIT_REASON KVM_EXIT_IO + +static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ +} + +#endif diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index b3b00be1ef82..69f26d80c821 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -200,7 +200,7 @@ static void *vcpu_worker(void *data) if (READ_ONCE(host_quit)) return NULL; - clock_gettime(CLOCK_MONOTONIC_RAW, &start); + clock_gettime(CLOCK_MONOTONIC, &start); ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); @@ -367,7 +367,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Test the stage of KVM creating mappings */ *current_stage = KVM_CREATE_MAPPINGS; - clock_gettime(CLOCK_MONOTONIC_RAW, &start); + clock_gettime(CLOCK_MONOTONIC, &start); vcpus_complete_new_stage(*current_stage); ts_diff = timespec_elapsed(start); @@ -380,7 +380,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) *current_stage = KVM_UPDATE_MAPPINGS; - clock_gettime(CLOCK_MONOTONIC_RAW, &start); + clock_gettime(CLOCK_MONOTONIC, &start); vcpus_complete_new_stage(*current_stage); ts_diff = timespec_elapsed(start); @@ -392,7 +392,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) *current_stage = KVM_ADJUST_MAPPINGS; - clock_gettime(CLOCK_MONOTONIC_RAW, &start); + clock_gettime(CLOCK_MONOTONIC, &start); vcpus_complete_new_stage(*current_stage); ts_diff = timespec_elapsed(start); diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index f212bd8ab93d..ddab0ce89d4d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -6,11 +6,7 @@ */ #include "kvm_util.h" -/* - * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each - * VM), it must not be accessed from host code. - */ -static vm_vaddr_t *ucall_exit_mmio_addr; +vm_vaddr_t *ucall_exit_mmio_addr; void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { @@ -23,11 +19,6 @@ void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); } -void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - WRITE_ONCE(*ucall_exit_mmio_addr, uc); -} - void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c new file mode 100644 index 000000000000..c4a69d8aeb68 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "ucall_common.h" + +#define APPEND_BUFFER_SAFE(str, end, v) \ +do { \ + GUEST_ASSERT(str < end); \ + *str++ = (v); \ +} while (0) + +static int isdigit(int ch) +{ + return (ch >= '0') && (ch <= '9'); +} + +static int skip_atoi(const char **s) +{ + int i = 0; + + while (isdigit(**s)) + i = i * 10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SMALL 32 /* Must be 32 == 0x20 */ +#define SPECIAL 64 /* 0x */ + +#define __do_div(n, base) \ +({ \ + int __res; \ + \ + __res = ((uint64_t) n) % (uint32_t) base; \ + n = ((uint64_t) n) / (uint32_t) base; \ + __res; \ +}) + +static char *number(char *str, const char *end, long num, int base, int size, + int precision, int type) +{ + /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ + static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ + + char tmp[66]; + char c, sign, locase; + int i; + + /* + * locase = 0 or 0x20. ORing digits or letters with 'locase' + * produces same digits or (maybe lowercased) letters + */ + locase = (type & SMALL); + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 16) + return NULL; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++] = '0'; + else + while (num != 0) + tmp[i++] = (digits[__do_div(num, base)] | locase); + if (i > precision) + precision = i; + size -= precision; + if (!(type & (ZEROPAD + LEFT))) + while (size-- > 0) + APPEND_BUFFER_SAFE(str, end, ' '); + if (sign) + APPEND_BUFFER_SAFE(str, end, sign); + if (type & SPECIAL) { + if (base == 8) + APPEND_BUFFER_SAFE(str, end, '0'); + else if (base == 16) { + APPEND_BUFFER_SAFE(str, end, '0'); + APPEND_BUFFER_SAFE(str, end, 'x'); + } + } + if (!(type & LEFT)) + while (size-- > 0) + APPEND_BUFFER_SAFE(str, end, c); + while (i < precision--) + APPEND_BUFFER_SAFE(str, end, '0'); + while (i-- > 0) + APPEND_BUFFER_SAFE(str, end, tmp[i]); + while (size-- > 0) + APPEND_BUFFER_SAFE(str, end, ' '); + + return str; +} + +int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args) +{ + char *str, *end; + const char *s; + uint64_t num; + int i, base; + int len; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* + * min. # of digits for integers; max + * number of chars for from string + */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + + end = buf + n; + GUEST_ASSERT(buf < end); + GUEST_ASSERT(n > 0); + + for (str = buf; *fmt; ++fmt) { + if (*fmt != '%') { + APPEND_BUFFER_SAFE(str, end, *fmt); + continue; + } + + /* process flags */ + flags = 0; +repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': + flags |= LEFT; + goto repeat; + case '+': + flags |= PLUS; + goto repeat; + case ' ': + flags |= SPACE; + goto repeat; + case '#': + flags |= SPECIAL; + goto repeat; + case '0': + flags |= ZEROPAD; + goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + APPEND_BUFFER_SAFE(str, end, ' '); + APPEND_BUFFER_SAFE(str, end, + (uint8_t)va_arg(args, int)); + while (--field_width > 0) + APPEND_BUFFER_SAFE(str, end, ' '); + continue; + + case 's': + s = va_arg(args, char *); + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + APPEND_BUFFER_SAFE(str, end, ' '); + for (i = 0; i < len; ++i) + APPEND_BUFFER_SAFE(str, end, *s++); + while (len < field_width--) + APPEND_BUFFER_SAFE(str, end, ' '); + continue; + + case 'p': + if (field_width == -1) { + field_width = 2 * sizeof(void *); + flags |= SPECIAL | SMALL | ZEROPAD; + } + str = number(str, end, + (uint64_t)va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + case 'n': + if (qualifier == 'l') { + long *ip = va_arg(args, long *); + *ip = (str - buf); + } else { + int *ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + APPEND_BUFFER_SAFE(str, end, '%'); + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'x': + flags |= SMALL; + case 'X': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + APPEND_BUFFER_SAFE(str, end, '%'); + if (*fmt) + APPEND_BUFFER_SAFE(str, end, *fmt); + else + --fmt; + continue; + } + if (qualifier == 'l') + num = va_arg(args, uint64_t); + else if (qualifier == 'h') { + num = (uint16_t)va_arg(args, int); + if (flags & SIGN) + num = (int16_t)num; + } else if (flags & SIGN) + num = va_arg(args, int); + else + num = va_arg(args, uint32_t); + str = number(str, end, num, base, field_width, precision, flags); + } + + GUEST_ASSERT(str < end); + *str = '\0'; + return str - buf; +} + +int guest_snprintf(char *buf, int n, const char *fmt, ...) +{ + va_list va; + int len; + + va_start(va, fmt); + len = guest_vsnprintf(buf, n, fmt, va); + va_end(va); + + return len; +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 9741a7ff6380..7a8af1821f5d 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -312,6 +312,7 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, uint64_t extra_mem_pages) { + uint64_t page_size = vm_guest_mode_params[mode].page_size; uint64_t nr_pages; TEST_ASSERT(nr_runnable_vcpus, @@ -340,6 +341,9 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, */ nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; + /* Account for the number of pages needed by ucall. */ + nr_pages += ucall_nr_pages_required(page_size); + return vm_adjust_num_guest_pages(mode, nr_pages); } @@ -994,7 +998,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, if (src_type == VM_MEM_SRC_ANONYMOUS_THP) alignment = max(backing_src_pagesz, alignment); - ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); + TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); /* Add enough memory to align up if necessary */ if (alignment > 1) diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 9a3476a2dfca..fe6d1004f018 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -10,10 +10,6 @@ #include "kvm_util.h" #include "processor.h" -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ -} - struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, @@ -40,13 +36,6 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, return ret; } -void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, - KVM_RISCV_SELFTESTS_SBI_UCALL, - uc, 0, 0, 0, 0, 0); -} - void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index a7f02dc372cf..cca98734653d 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -6,16 +6,6 @@ */ #include "kvm_util.h" -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ -} - -void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ - asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); -} - void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 50e0cf41a7dd..88cb6b84e6f3 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -634,7 +634,6 @@ static void node_reduce(struct sparsebit *s, struct node *nodep) tmp = node_prev(s, nodep); node_rm(s, nodep); - nodep = NULL; nodep = tmp; reduction_performed = true; diff --git a/tools/testing/selftests/kvm/lib/string_override.c b/tools/testing/selftests/kvm/lib/string_override.c index 632398adc229..5d1c87277c49 100644 --- a/tools/testing/selftests/kvm/lib/string_override.c +++ b/tools/testing/selftests/kvm/lib/string_override.c @@ -37,3 +37,12 @@ void *memset(void *s, int c, size_t count) *xs++ = c; return s; } + +size_t strnlen(const char *s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index b772193f6c18..3e36019eeb4a 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -5,6 +5,9 @@ * Copyright (C) 2020, Google LLC. */ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdarg.h> #include <assert.h> #include <ctype.h> #include <limits.h> @@ -377,3 +380,15 @@ int atoi_paranoid(const char *num_str) return num; } + +char *strdup_printf(const char *fmt, ...) +{ + va_list ap; + char *str; + + va_start(ap, fmt); + vasprintf(&str, fmt, ap); + va_end(ap); + + return str; +} diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index 2f0e2ea941cc..816a3fa109bf 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -11,6 +11,11 @@ struct ucall_header { struct ucall ucalls[KVM_MAX_VCPUS]; }; +int ucall_nr_pages_required(uint64_t page_size) +{ + return align_up(sizeof(struct ucall_header), page_size) / page_size; +} + /* * ucall_pool holds per-VM values (global data is duplicated by each VM), it * must not be accessed from host code. @@ -70,6 +75,45 @@ static void ucall_free(struct ucall *uc) clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use); } +void ucall_assert(uint64_t cmd, const char *exp, const char *file, + unsigned int line, const char *fmt, ...) +{ + struct ucall *uc; + va_list va; + + uc = ucall_alloc(); + uc->cmd = cmd; + + WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp)); + WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file)); + WRITE_ONCE(uc->args[GUEST_LINE], line); + + va_start(va, fmt); + guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va); + va_end(va); + + ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + + ucall_free(uc); +} + +void ucall_fmt(uint64_t cmd, const char *fmt, ...) +{ + struct ucall *uc; + va_list va; + + uc = ucall_alloc(); + uc->cmd = cmd; + + va_start(va, fmt); + guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va); + va_end(va); + + ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + + ucall_free(uc); +} + void ucall(uint64_t cmd, int nargs, ...) { struct ucall *uc; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index d4a0b504b1e0..d8288374078e 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1074,11 +1074,6 @@ static bool kvm_fixup_exception(struct ex_regs *regs) return true; } -void kvm_exit_unexpected_vector(uint32_t value) -{ - ucall(UCALL_UNHANDLED, 1, value); -} - void route_exception(struct ex_regs *regs) { typedef void(*handler)(struct ex_regs *); @@ -1092,7 +1087,10 @@ void route_exception(struct ex_regs *regs) if (kvm_fixup_exception(regs)) return; - kvm_exit_unexpected_vector(regs->vector); + ucall_assert(UCALL_UNHANDLED, + "Unhandled exception in guest", __FILE__, __LINE__, + "Unhandled exception '0x%lx' at guest RIP '0x%lx'", + regs->vector, regs->rip); } void vm_init_descriptor_tables(struct kvm_vm *vm) @@ -1135,12 +1133,8 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { struct ucall uc; - if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) { - uint64_t vector = uc.args[0]; - - TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)", - vector); - } + if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) + REPORT_GUEST_ASSERT(uc); } const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index 4d41dc63cc9e..1265cecc7dd1 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -8,14 +8,38 @@ #define UCALL_PIO_PORT ((uint16_t)0x1000) -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ -} - void ucall_arch_do_ucall(vm_vaddr_t uc) { - asm volatile("in %[port], %%al" - : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory"); + /* + * FIXME: Revert this hack (the entire commit that added it) once nVMX + * preserves L2 GPRs across a nested VM-Exit. If a ucall from L2, e.g. + * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be + * clobbered by L1. Save and restore non-volatile GPRs (clobbering RBP + * in particular is problematic) along with RDX and RDI (which are + * inputs), and clobber volatile GPRs. *sigh* + */ +#define HORRIFIC_L2_UCALL_CLOBBER_HACK \ + "rcx", "rsi", "r8", "r9", "r10", "r11" + + asm volatile("push %%rbp\n\t" + "push %%r15\n\t" + "push %%r14\n\t" + "push %%r13\n\t" + "push %%r12\n\t" + "push %%rbx\n\t" + "push %%rdx\n\t" + "push %%rdi\n\t" + "in %[port], %%al\n\t" + "pop %%rdi\n\t" + "pop %%rdx\n\t" + "pop %%rbx\n\t" + "pop %%r12\n\t" + "pop %%r13\n\t" + "pop %%r14\n\t" + "pop %%r15\n\t" + "pop %%rbp\n\t" + : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory", + HORRIFIC_L2_UCALL_CLOBBER_HACK); } void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index feaf2be20ff2..6628dc4dda89 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -55,7 +55,7 @@ static void rendezvous_with_boss(void) static void run_vcpu(struct kvm_vcpu *vcpu) { vcpu_run(vcpu); - ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); } static void *vcpu_worker(void *data) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 4210cd21d159..20eb2e730800 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -157,7 +157,7 @@ static void *vcpu_worker(void *__data) goto done; break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_1(uc, "val = %lu"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; @@ -560,7 +560,7 @@ static void guest_code_test_memslot_rw(void) ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) { uint64_t val = *(uint64_t *)ptr; - GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val); + GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2); *(uint64_t *)ptr = 0; } diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c new file mode 100644 index 000000000000..d8ecacd03ecf --- /dev/null +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -0,0 +1,872 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (c) 2023 Intel Corporation + * + */ +#include <stdio.h> +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" + +#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK) + +bool filter_reg(__u64 reg) +{ + /* + * Some ISA extensions are optional and not present on all host, + * but they can't be disabled through ISA_EXT registers when present. + * So, to make life easy, just filtering out these kind of registers. + */ + switch (reg & ~REG_MASK) { + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM: + return true; + default: + break; + } + + return false; +} + +bool check_reject_set(int err) +{ + return err == EINVAL; +} + +static inline bool vcpu_has_ext(struct kvm_vcpu *vcpu, int ext) +{ + int ret; + unsigned long value; + + ret = __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(ext), &value); + if (ret) { + printf("Failed to get ext %d", ext); + return false; + } + + return !!value; +} + +void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) +{ + struct vcpu_reg_sublist *s; + + /* + * Disable all extensions which were enabled by default + * if they were available in the risc-v host. + */ + for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) + __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0); + + for_each_sublist(c, s) { + if (!s->feature) + continue; + + /* Try to enable the desired extension */ + __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(s->feature), 1); + + /* Double check whether the desired extension was enabled */ + __TEST_REQUIRE(vcpu_has_ext(vcpu, s->feature), + "%s not available, skipping tests\n", s->name); + } +} + +static const char *config_id_to_str(__u64 id) +{ + /* reg_off is the offset into struct kvm_riscv_config */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CONFIG); + + switch (reg_off) { + case KVM_REG_RISCV_CONFIG_REG(isa): + return "KVM_REG_RISCV_CONFIG_REG(isa)"; + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): + return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)"; + case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): + return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)"; + case KVM_REG_RISCV_CONFIG_REG(mvendorid): + return "KVM_REG_RISCV_CONFIG_REG(mvendorid)"; + case KVM_REG_RISCV_CONFIG_REG(marchid): + return "KVM_REG_RISCV_CONFIG_REG(marchid)"; + case KVM_REG_RISCV_CONFIG_REG(mimpid): + return "KVM_REG_RISCV_CONFIG_REG(mimpid)"; + case KVM_REG_RISCV_CONFIG_REG(satp_mode): + return "KVM_REG_RISCV_CONFIG_REG(satp_mode)"; + } + + /* + * Config regs would grow regularly with new pseudo reg added, so + * just show raw id to indicate a new pseudo config reg. + */ + return strdup_printf("KVM_REG_RISCV_CONFIG_REG(%lld) /* UNKNOWN */", reg_off); +} + +static const char *core_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct kvm_riscv_core */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CORE); + + switch (reg_off) { + case KVM_REG_RISCV_CORE_REG(regs.pc): + return "KVM_REG_RISCV_CORE_REG(regs.pc)"; + case KVM_REG_RISCV_CORE_REG(regs.ra): + return "KVM_REG_RISCV_CORE_REG(regs.ra)"; + case KVM_REG_RISCV_CORE_REG(regs.sp): + return "KVM_REG_RISCV_CORE_REG(regs.sp)"; + case KVM_REG_RISCV_CORE_REG(regs.gp): + return "KVM_REG_RISCV_CORE_REG(regs.gp)"; + case KVM_REG_RISCV_CORE_REG(regs.tp): + return "KVM_REG_RISCV_CORE_REG(regs.tp)"; + case KVM_REG_RISCV_CORE_REG(regs.t0) ... KVM_REG_RISCV_CORE_REG(regs.t2): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.t0)); + case KVM_REG_RISCV_CORE_REG(regs.s0) ... KVM_REG_RISCV_CORE_REG(regs.s1): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.s0)); + case KVM_REG_RISCV_CORE_REG(regs.a0) ... KVM_REG_RISCV_CORE_REG(regs.a7): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.a%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.a0)); + case KVM_REG_RISCV_CORE_REG(regs.s2) ... KVM_REG_RISCV_CORE_REG(regs.s11): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.s2) + 2); + case KVM_REG_RISCV_CORE_REG(regs.t3) ... KVM_REG_RISCV_CORE_REG(regs.t6): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.t3) + 3); + case KVM_REG_RISCV_CORE_REG(mode): + return "KVM_REG_RISCV_CORE_REG(mode)"; + } + + TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id); + return NULL; +} + +#define RISCV_CSR_GENERAL(csr) \ + "KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(" #csr ")" +#define RISCV_CSR_AIA(csr) \ + "KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_REG(" #csr ")" + +static const char *general_csr_id_to_str(__u64 reg_off) +{ + /* reg_off is the offset into struct kvm_riscv_csr */ + switch (reg_off) { + case KVM_REG_RISCV_CSR_REG(sstatus): + return RISCV_CSR_GENERAL(sstatus); + case KVM_REG_RISCV_CSR_REG(sie): + return RISCV_CSR_GENERAL(sie); + case KVM_REG_RISCV_CSR_REG(stvec): + return RISCV_CSR_GENERAL(stvec); + case KVM_REG_RISCV_CSR_REG(sscratch): + return RISCV_CSR_GENERAL(sscratch); + case KVM_REG_RISCV_CSR_REG(sepc): + return RISCV_CSR_GENERAL(sepc); + case KVM_REG_RISCV_CSR_REG(scause): + return RISCV_CSR_GENERAL(scause); + case KVM_REG_RISCV_CSR_REG(stval): + return RISCV_CSR_GENERAL(stval); + case KVM_REG_RISCV_CSR_REG(sip): + return RISCV_CSR_GENERAL(sip); + case KVM_REG_RISCV_CSR_REG(satp): + return RISCV_CSR_GENERAL(satp); + case KVM_REG_RISCV_CSR_REG(scounteren): + return RISCV_CSR_GENERAL(scounteren); + } + + TEST_FAIL("Unknown general csr reg: 0x%llx", reg_off); + return NULL; +} + +static const char *aia_csr_id_to_str(__u64 reg_off) +{ + /* reg_off is the offset into struct kvm_riscv_aia_csr */ + switch (reg_off) { + case KVM_REG_RISCV_CSR_AIA_REG(siselect): + return RISCV_CSR_AIA(siselect); + case KVM_REG_RISCV_CSR_AIA_REG(iprio1): + return RISCV_CSR_AIA(iprio1); + case KVM_REG_RISCV_CSR_AIA_REG(iprio2): + return RISCV_CSR_AIA(iprio2); + case KVM_REG_RISCV_CSR_AIA_REG(sieh): + return RISCV_CSR_AIA(sieh); + case KVM_REG_RISCV_CSR_AIA_REG(siph): + return RISCV_CSR_AIA(siph); + case KVM_REG_RISCV_CSR_AIA_REG(iprio1h): + return RISCV_CSR_AIA(iprio1h); + case KVM_REG_RISCV_CSR_AIA_REG(iprio2h): + return RISCV_CSR_AIA(iprio2h); + } + + TEST_FAIL("Unknown aia csr reg: 0x%llx", reg_off); + return NULL; +} + +static const char *csr_id_to_str(const char *prefix, __u64 id) +{ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CSR); + __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK; + + reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_CSR_GENERAL: + return general_csr_id_to_str(reg_off); + case KVM_REG_RISCV_CSR_AIA: + return aia_csr_id_to_str(reg_off); + } + + TEST_FAIL("%s: Unknown csr subtype: 0x%llx", prefix, reg_subtype); + return NULL; +} + +static const char *timer_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct kvm_riscv_timer */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_TIMER); + + switch (reg_off) { + case KVM_REG_RISCV_TIMER_REG(frequency): + return "KVM_REG_RISCV_TIMER_REG(frequency)"; + case KVM_REG_RISCV_TIMER_REG(time): + return "KVM_REG_RISCV_TIMER_REG(time)"; + case KVM_REG_RISCV_TIMER_REG(compare): + return "KVM_REG_RISCV_TIMER_REG(compare)"; + case KVM_REG_RISCV_TIMER_REG(state): + return "KVM_REG_RISCV_TIMER_REG(state)"; + } + + TEST_FAIL("%s: Unknown timer reg id: 0x%llx", prefix, id); + return NULL; +} + +static const char *fp_f_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct __riscv_f_ext_state */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_F); + + switch (reg_off) { + case KVM_REG_RISCV_FP_F_REG(f[0]) ... + KVM_REG_RISCV_FP_F_REG(f[31]): + return strdup_printf("KVM_REG_RISCV_FP_F_REG(f[%lld])", reg_off); + case KVM_REG_RISCV_FP_F_REG(fcsr): + return "KVM_REG_RISCV_FP_F_REG(fcsr)"; + } + + TEST_FAIL("%s: Unknown fp_f reg id: 0x%llx", prefix, id); + return NULL; +} + +static const char *fp_d_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct __riscv_d_ext_state */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_D); + + switch (reg_off) { + case KVM_REG_RISCV_FP_D_REG(f[0]) ... + KVM_REG_RISCV_FP_D_REG(f[31]): + return strdup_printf("KVM_REG_RISCV_FP_D_REG(f[%lld])", reg_off); + case KVM_REG_RISCV_FP_D_REG(fcsr): + return "KVM_REG_RISCV_FP_D_REG(fcsr)"; + } + + TEST_FAIL("%s: Unknown fp_d reg id: 0x%llx", prefix, id); + return NULL; +} + +static const char *isa_ext_id_to_str(__u64 id) +{ + /* reg_off is the offset into unsigned long kvm_isa_ext_arr[] */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT); + + static const char * const kvm_isa_ext_reg_name[] = { + "KVM_RISCV_ISA_EXT_A", + "KVM_RISCV_ISA_EXT_C", + "KVM_RISCV_ISA_EXT_D", + "KVM_RISCV_ISA_EXT_F", + "KVM_RISCV_ISA_EXT_H", + "KVM_RISCV_ISA_EXT_I", + "KVM_RISCV_ISA_EXT_M", + "KVM_RISCV_ISA_EXT_SVPBMT", + "KVM_RISCV_ISA_EXT_SSTC", + "KVM_RISCV_ISA_EXT_SVINVAL", + "KVM_RISCV_ISA_EXT_ZIHINTPAUSE", + "KVM_RISCV_ISA_EXT_ZICBOM", + "KVM_RISCV_ISA_EXT_ZICBOZ", + "KVM_RISCV_ISA_EXT_ZBB", + "KVM_RISCV_ISA_EXT_SSAIA", + "KVM_RISCV_ISA_EXT_V", + "KVM_RISCV_ISA_EXT_SVNAPOT", + "KVM_RISCV_ISA_EXT_ZBA", + "KVM_RISCV_ISA_EXT_ZBS", + "KVM_RISCV_ISA_EXT_ZICNTR", + "KVM_RISCV_ISA_EXT_ZICSR", + "KVM_RISCV_ISA_EXT_ZIFENCEI", + "KVM_RISCV_ISA_EXT_ZIHPM", + }; + + if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) { + /* + * isa_ext regs would grow regularly with new isa extension added, so + * just show "reg" to indicate a new extension. + */ + return strdup_printf("%lld /* UNKNOWN */", reg_off); + } + + return kvm_isa_ext_reg_name[reg_off]; +} + +static const char *sbi_ext_single_id_to_str(__u64 reg_off) +{ + /* reg_off is KVM_RISCV_SBI_EXT_ID */ + static const char * const kvm_sbi_ext_reg_name[] = { + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR", + }; + + if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) { + /* + * sbi_ext regs would grow regularly with new sbi extension added, so + * just show "reg" to indicate a new extension. + */ + return strdup_printf("KVM_REG_RISCV_SBI_SINGLE | %lld /* UNKNOWN */", reg_off); + } + + return kvm_sbi_ext_reg_name[reg_off]; +} + +static const char *sbi_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off) +{ + if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST) { + /* + * sbi_ext regs would grow regularly with new sbi extension added, so + * just show "reg" to indicate a new extension. + */ + return strdup_printf("%lld /* UNKNOWN */", reg_off); + } + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_MULTI_EN: + return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld", reg_off); + case KVM_REG_RISCV_SBI_MULTI_DIS: + return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld", reg_off); + } + + return NULL; +} + +static const char *sbi_ext_id_to_str(const char *prefix, __u64 id) +{ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_EXT); + __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK; + + reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_SINGLE: + return sbi_ext_single_id_to_str(reg_off); + case KVM_REG_RISCV_SBI_MULTI_EN: + case KVM_REG_RISCV_SBI_MULTI_DIS: + return sbi_ext_multi_id_to_str(reg_subtype, reg_off); + } + + TEST_FAIL("%s: Unknown sbi ext subtype: 0x%llx", prefix, reg_subtype); + return NULL; +} + +void print_reg(const char *prefix, __u64 id) +{ + const char *reg_size = NULL; + + TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_RISCV, + "%s: KVM_REG_RISCV missing in reg id: 0x%llx", prefix, id); + + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U32: + reg_size = "KVM_REG_SIZE_U32"; + break; + case KVM_REG_SIZE_U64: + reg_size = "KVM_REG_SIZE_U64"; + break; + case KVM_REG_SIZE_U128: + reg_size = "KVM_REG_SIZE_U128"; + break; + default: + TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx", + prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + } + + switch (id & KVM_REG_RISCV_TYPE_MASK) { + case KVM_REG_RISCV_CONFIG: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CONFIG | %s,\n", + reg_size, config_id_to_str(id)); + break; + case KVM_REG_RISCV_CORE: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CORE | %s,\n", + reg_size, core_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_CSR: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CSR | %s,\n", + reg_size, csr_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_TIMER: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_TIMER | %s,\n", + reg_size, timer_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_FP_F: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_F | %s,\n", + reg_size, fp_f_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_FP_D: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n", + reg_size, fp_d_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_ISA_EXT: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n", + reg_size, isa_ext_id_to_str(id)); + break; + case KVM_REG_RISCV_SBI_EXT: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_EXT | %s,\n", + reg_size, sbi_ext_id_to_str(prefix, id)); + break; + default: + TEST_FAIL("%s: Unexpected reg type: 0x%llx in reg id: 0x%llx", prefix, + (id & KVM_REG_RISCV_TYPE_MASK) >> KVM_REG_RISCV_TYPE_SHIFT, id); + } +} + +/* + * The current blessed list was primed with the output of kernel version + * v6.5-rc3 and then later updated with new registers. + */ +static __u64 base_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.gp), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.tp), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t0), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t1), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t2), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s0), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s1), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a0), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a1), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a2), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a3), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a4), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a5), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a6), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a7), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s2), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s3), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s4), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s5), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s6), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s7), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s8), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s9), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s10), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s11), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t3), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t4), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t5), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t6), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(mode), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sstatus), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sie), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stvec), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sscratch), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sepc), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scause), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stval), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sip), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(satp), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scounteren), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(frequency), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_EN | 0, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_DIS | 0, +}; + +/* + * The skips_set list registers that should skip set test. + * - KVM_REG_RISCV_TIMER_REG(state): set would fail if it was not initialized properly. + */ +static __u64 base_skips_set[] = { + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state), +}; + +static __u64 h_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H, +}; + +static __u64 zicbom_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM, +}; + +static __u64 zicboz_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ, +}; + +static __u64 svpbmt_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT, +}; + +static __u64 sstc_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC, +}; + +static __u64 svinval_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL, +}; + +static __u64 zihintpause_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE, +}; + +static __u64 zba_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA, +}; + +static __u64 zbb_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB, +}; + +static __u64 zbs_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS, +}; + +static __u64 zicntr_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR, +}; + +static __u64 zicsr_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR, +}; + +static __u64 zifencei_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI, +}; + +static __u64 zihpm_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM, +}; + +static __u64 aia_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA, +}; + +static __u64 fp_f_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[0]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[1]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[2]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[3]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[4]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[5]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[6]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[7]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[8]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[9]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[10]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[11]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[12]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[13]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[14]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[15]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[16]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[17]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[18]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[19]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[20]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[21]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[22]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[23]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[24]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[25]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[26]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[27]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[28]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[29]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[30]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[31]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(fcsr), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_F, +}; + +static __u64 fp_d_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[0]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[1]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[2]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[3]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[4]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[5]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[6]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[7]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[8]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[9]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[10]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[11]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[12]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[13]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[14]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[15]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[16]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[17]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[18]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[19]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[20]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[21]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[22]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[23]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[24]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[25]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[26]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[27]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[28]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[29]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[30]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[31]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(fcsr), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_D, +}; + +#define BASE_SUBLIST \ + {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \ + .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),} +#define H_REGS_SUBLIST \ + {"h", .feature = KVM_RISCV_ISA_EXT_H, .regs = h_regs, .regs_n = ARRAY_SIZE(h_regs),} +#define ZICBOM_REGS_SUBLIST \ + {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),} +#define ZICBOZ_REGS_SUBLIST \ + {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),} +#define SVPBMT_REGS_SUBLIST \ + {"svpbmt", .feature = KVM_RISCV_ISA_EXT_SVPBMT, .regs = svpbmt_regs, .regs_n = ARRAY_SIZE(svpbmt_regs),} +#define SSTC_REGS_SUBLIST \ + {"sstc", .feature = KVM_RISCV_ISA_EXT_SSTC, .regs = sstc_regs, .regs_n = ARRAY_SIZE(sstc_regs),} +#define SVINVAL_REGS_SUBLIST \ + {"svinval", .feature = KVM_RISCV_ISA_EXT_SVINVAL, .regs = svinval_regs, .regs_n = ARRAY_SIZE(svinval_regs),} +#define ZIHINTPAUSE_REGS_SUBLIST \ + {"zihintpause", .feature = KVM_RISCV_ISA_EXT_ZIHINTPAUSE, .regs = zihintpause_regs, .regs_n = ARRAY_SIZE(zihintpause_regs),} +#define ZBA_REGS_SUBLIST \ + {"zba", .feature = KVM_RISCV_ISA_EXT_ZBA, .regs = zba_regs, .regs_n = ARRAY_SIZE(zba_regs),} +#define ZBB_REGS_SUBLIST \ + {"zbb", .feature = KVM_RISCV_ISA_EXT_ZBB, .regs = zbb_regs, .regs_n = ARRAY_SIZE(zbb_regs),} +#define ZBS_REGS_SUBLIST \ + {"zbs", .feature = KVM_RISCV_ISA_EXT_ZBS, .regs = zbs_regs, .regs_n = ARRAY_SIZE(zbs_regs),} +#define ZICNTR_REGS_SUBLIST \ + {"zicntr", .feature = KVM_RISCV_ISA_EXT_ZICNTR, .regs = zicntr_regs, .regs_n = ARRAY_SIZE(zicntr_regs),} +#define ZICSR_REGS_SUBLIST \ + {"zicsr", .feature = KVM_RISCV_ISA_EXT_ZICSR, .regs = zicsr_regs, .regs_n = ARRAY_SIZE(zicsr_regs),} +#define ZIFENCEI_REGS_SUBLIST \ + {"zifencei", .feature = KVM_RISCV_ISA_EXT_ZIFENCEI, .regs = zifencei_regs, .regs_n = ARRAY_SIZE(zifencei_regs),} +#define ZIHPM_REGS_SUBLIST \ + {"zihpm", .feature = KVM_RISCV_ISA_EXT_ZIHPM, .regs = zihpm_regs, .regs_n = ARRAY_SIZE(zihpm_regs),} +#define AIA_REGS_SUBLIST \ + {"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),} +#define FP_F_REGS_SUBLIST \ + {"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \ + .regs_n = ARRAY_SIZE(fp_f_regs),} +#define FP_D_REGS_SUBLIST \ + {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \ + .regs_n = ARRAY_SIZE(fp_d_regs),} + +static struct vcpu_reg_list h_config = { + .sublists = { + BASE_SUBLIST, + H_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zicbom_config = { + .sublists = { + BASE_SUBLIST, + ZICBOM_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zicboz_config = { + .sublists = { + BASE_SUBLIST, + ZICBOZ_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list svpbmt_config = { + .sublists = { + BASE_SUBLIST, + SVPBMT_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list sstc_config = { + .sublists = { + BASE_SUBLIST, + SSTC_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list svinval_config = { + .sublists = { + BASE_SUBLIST, + SVINVAL_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zihintpause_config = { + .sublists = { + BASE_SUBLIST, + ZIHINTPAUSE_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zba_config = { + .sublists = { + BASE_SUBLIST, + ZBA_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zbb_config = { + .sublists = { + BASE_SUBLIST, + ZBB_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zbs_config = { + .sublists = { + BASE_SUBLIST, + ZBS_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zicntr_config = { + .sublists = { + BASE_SUBLIST, + ZICNTR_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zicsr_config = { + .sublists = { + BASE_SUBLIST, + ZICSR_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zifencei_config = { + .sublists = { + BASE_SUBLIST, + ZIFENCEI_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zihpm_config = { + .sublists = { + BASE_SUBLIST, + ZIHPM_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list aia_config = { + .sublists = { + BASE_SUBLIST, + AIA_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list fp_f_config = { + .sublists = { + BASE_SUBLIST, + FP_F_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list fp_d_config = { + .sublists = { + BASE_SUBLIST, + FP_D_REGS_SUBLIST, + {0}, + }, +}; + +struct vcpu_reg_list *vcpu_configs[] = { + &h_config, + &zicbom_config, + &zicboz_config, + &svpbmt_config, + &sstc_config, + &svinval_config, + &zihintpause_config, + &zba_config, + &zbb_config, + &zbs_config, + &zicntr_config, + &zicsr_config, + &zifencei_config, + &zihpm_config, + &aia_config, + &fp_f_config, + &fp_d_config, +}; +int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c index 1d73e78e8fa7..c8e0a6495a63 100644 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ b/tools/testing/selftests/kvm/s390x/cmma_test.c @@ -237,8 +237,8 @@ static void test_get_cmma_basic(void) /* GET_CMMA_BITS without CMMA enabled should fail */ rc = vm_get_cmma_bits(vm, 0, &errno_out); - ASSERT_EQ(rc, -1); - ASSERT_EQ(errno_out, ENXIO); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno_out, ENXIO); enable_cmma(vm); vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); @@ -247,31 +247,31 @@ static void test_get_cmma_basic(void) /* GET_CMMA_BITS without migration mode and without peeking should fail */ rc = vm_get_cmma_bits(vm, 0, &errno_out); - ASSERT_EQ(rc, -1); - ASSERT_EQ(errno_out, EINVAL); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno_out, EINVAL); /* GET_CMMA_BITS without migration mode and with peeking should work */ rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out); - ASSERT_EQ(rc, 0); - ASSERT_EQ(errno_out, 0); + TEST_ASSERT_EQ(rc, 0); + TEST_ASSERT_EQ(errno_out, 0); enable_dirty_tracking(vm); enable_migration_mode(vm); /* GET_CMMA_BITS with invalid flags */ rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out); - ASSERT_EQ(rc, -1); - ASSERT_EQ(errno_out, EINVAL); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno_out, EINVAL); kvm_vm_free(vm); } static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu) { - ASSERT_EQ(vcpu->run->exit_reason, 13); - ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4); - ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300); - ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000); + TEST_ASSERT_EQ(vcpu->run->exit_reason, 13); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000); } static void test_migration_mode(void) @@ -283,8 +283,8 @@ static void test_migration_mode(void) /* enabling migration mode on a VM without memory should fail */ rc = __enable_migration_mode(vm); - ASSERT_EQ(rc, -1); - ASSERT_EQ(errno, EINVAL); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno, EINVAL); TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); errno = 0; @@ -304,8 +304,8 @@ static void test_migration_mode(void) /* migration mode when memslots have dirty tracking off should fail */ rc = __enable_migration_mode(vm); - ASSERT_EQ(rc, -1); - ASSERT_EQ(errno, EINVAL); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno, EINVAL); TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); errno = 0; @@ -314,7 +314,7 @@ static void test_migration_mode(void) /* enabling migration mode should work now */ rc = __enable_migration_mode(vm); - ASSERT_EQ(rc, 0); + TEST_ASSERT_EQ(rc, 0); TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); errno = 0; @@ -350,7 +350,7 @@ static void test_migration_mode(void) */ vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES); rc = __enable_migration_mode(vm); - ASSERT_EQ(rc, 0); + TEST_ASSERT_EQ(rc, 0); TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); errno = 0; @@ -394,9 +394,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm) }; memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); - ASSERT_EQ(args.count, MAIN_PAGE_COUNT); - ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT); - ASSERT_EQ(args.start_gfn, 0); + TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT); + TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT); + TEST_ASSERT_EQ(args.start_gfn, 0); /* ...and then - after a hole - the TEST_DATA memslot should follow */ args = (struct kvm_s390_cmma_log){ @@ -407,9 +407,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm) }; memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); - ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT); - ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN); - ASSERT_EQ(args.remaining, 0); + TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT); + TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN); + TEST_ASSERT_EQ(args.remaining, 0); /* ...and nothing else should be there */ args = (struct kvm_s390_cmma_log){ @@ -420,9 +420,9 @@ static void assert_all_slots_cmma_dirty(struct kvm_vm *vm) }; memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); - ASSERT_EQ(args.count, 0); - ASSERT_EQ(args.start_gfn, 0); - ASSERT_EQ(args.remaining, 0); + TEST_ASSERT_EQ(args.count, 0); + TEST_ASSERT_EQ(args.start_gfn, 0); + TEST_ASSERT_EQ(args.remaining, 0); } /** @@ -498,11 +498,11 @@ static void assert_cmma_dirty(u64 first_dirty_gfn, u64 dirty_gfn_count, const struct kvm_s390_cmma_log *res) { - ASSERT_EQ(res->start_gfn, first_dirty_gfn); - ASSERT_EQ(res->count, dirty_gfn_count); + TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn); + TEST_ASSERT_EQ(res->count, dirty_gfn_count); for (size_t i = 0; i < dirty_gfn_count; i++) - ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */ - ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */ + TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */ + TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */ } static void test_get_skip_holes(void) diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c new file mode 100644 index 000000000000..84313fb27529 --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/debug_test.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Test KVM debugging features. */ +#include "kvm_util.h" +#include "test_util.h" + +#include <linux/kvm.h> + +#define __LC_SVC_NEW_PSW 0x1c0 +#define __LC_PGM_NEW_PSW 0x1d0 +#define ICPT_INSTRUCTION 0x04 +#define IPA0_DIAG 0x8300 +#define PGM_SPECIFICATION 0x06 + +/* Common code for testing single-stepping interruptions. */ +extern char int_handler[]; +asm("int_handler:\n" + "j .\n"); + +static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code, + size_t new_psw_off, uint64_t *new_psw) +{ + struct kvm_guest_debug debug = {}; + struct kvm_regs regs; + struct kvm_vm *vm; + char *lowcore; + + vm = vm_create_with_one_vcpu(vcpu, guest_code); + lowcore = addr_gpa2hva(vm, 0); + new_psw[0] = (*vcpu)->run->psw_mask; + new_psw[1] = (uint64_t)int_handler; + memcpy(lowcore + new_psw_off, new_psw, 16); + vcpu_regs_get(*vcpu, ®s); + regs.gprs[2] = -1; + vcpu_regs_set(*vcpu, ®s); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; + vcpu_guest_debug_set(*vcpu, &debug); + vcpu_run(*vcpu); + + return vm; +} + +static void test_step_int(void *guest_code, size_t new_psw_off) +{ + struct kvm_vcpu *vcpu; + uint64_t new_psw[2]; + struct kvm_vm *vm; + + vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]); + TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]); + kvm_vm_free(vm); +} + +/* Test single-stepping "boring" program interruptions. */ +extern char test_step_pgm_guest_code[]; +asm("test_step_pgm_guest_code:\n" + ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n" + "j .\n"); + +static void test_step_pgm(void) +{ + test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW); +} + +/* + * Test single-stepping program interruptions caused by DIAG. + * Userspace emulation must not interfere with single-stepping. + */ +extern char test_step_pgm_diag_guest_code[]; +asm("test_step_pgm_diag_guest_code:\n" + "diag %r0,%r0,0\n" + "j .\n"); + +static void test_step_pgm_diag(void) +{ + struct kvm_s390_irq irq = { + .type = KVM_S390_PROGRAM_INT, + .u.pgm.code = PGM_SPECIFICATION, + }; + struct kvm_vcpu *vcpu; + uint64_t new_psw[2]; + struct kvm_vm *vm; + + vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code, + __LC_PGM_NEW_PSW, new_psw); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG); + vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq); + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]); + TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]); + kvm_vm_free(vm); +} + +/* + * Test single-stepping program interruptions caused by ISKE. + * CPUSTAT_KSS handling must not interfere with single-stepping. + */ +extern char test_step_pgm_iske_guest_code[]; +asm("test_step_pgm_iske_guest_code:\n" + "iske %r2,%r2\n" + "j .\n"); + +static void test_step_pgm_iske(void) +{ + test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW); +} + +/* + * Test single-stepping program interruptions caused by LCTL. + * KVM emulation must not interfere with single-stepping. + */ +extern char test_step_pgm_lctl_guest_code[]; +asm("test_step_pgm_lctl_guest_code:\n" + "lctl %c0,%c0,1\n" + "j .\n"); + +static void test_step_pgm_lctl(void) +{ + test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW); +} + +/* Test single-stepping supervisor-call interruptions. */ +extern char test_step_svc_guest_code[]; +asm("test_step_svc_guest_code:\n" + "svc 0\n" + "j .\n"); + +static void test_step_svc(void) +{ + test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW); +} + +/* Run all tests above. */ +static struct testdef { + const char *name; + void (*test)(void); +} testlist[] = { + { "single-step pgm", test_step_pgm }, + { "single-step pgm caused by diag", test_step_pgm_diag }, + { "single-step pgm caused by iske", test_step_pgm_iske }, + { "single-step pgm caused by lctl", test_step_pgm_lctl }, + { "single-step svc", test_step_svc }, +}; + +int main(int argc, char *argv[]) +{ + int idx; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 8e4b94d7b8dd..bb3ca9a5d731 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -4,7 +4,6 @@ * * Copyright (C) 2019, Red Hat, Inc. */ - #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -279,10 +278,10 @@ enum stage { vcpu_run(__vcpu); \ get_ucall(__vcpu, &uc); \ if (uc.cmd == UCALL_ABORT) { \ - REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu"); \ + REPORT_GUEST_ASSERT(uc); \ } \ - ASSERT_EQ(uc.cmd, UCALL_SYNC); \ - ASSERT_EQ(uc.args[1], __stage); \ + TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \ + TEST_ASSERT_EQ(uc.args[1], __stage); \ }) \ static void prepare_mem12(void) @@ -469,7 +468,7 @@ static __uint128_t cut_to_size(int size, __uint128_t val) case 16: return val; } - GUEST_ASSERT_1(false, "Invalid size"); + GUEST_FAIL("Invalid size = %u", size); return 0; } @@ -598,7 +597,7 @@ static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t return ret; } } - GUEST_ASSERT_1(false, "Invalid size"); + GUEST_FAIL("Invalid size = %u", size); return 0; } @@ -808,7 +807,7 @@ static void test_termination(void) HOST_SYNC(t.vcpu, STAGE_IDLED); MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168)); /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */ - ASSERT_EQ(teid & teid_mask, 0); + TEST_ASSERT_EQ(teid & teid_mask, 0); kvm_vm_free(t.kvm_vm); } diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c index a9a0b76e5fa4..c73f948c9b63 100644 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ b/tools/testing/selftests/kvm/s390x/tprot.c @@ -4,7 +4,6 @@ * * Copyright IBM Corp. 2021 */ - #include <sys/mman.h> #include "test_util.h" #include "kvm_util.h" @@ -156,7 +155,9 @@ static enum stage perform_next_stage(int *i, bool mapped_0) !mapped_0; if (!skip) { result = test_protection(tests[*i].addr, tests[*i].key); - GUEST_ASSERT_2(result == tests[*i].expected, *i, result); + __GUEST_ASSERT(result == tests[*i].expected, + "Wanted %u, got %u, for i = %u", + tests[*i].expected, result, *i); } } return stage; @@ -190,9 +191,9 @@ static void guest_code(void) vcpu_run(__vcpu); \ get_ucall(__vcpu, &uc); \ if (uc.cmd == UCALL_ABORT) \ - REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu"); \ - ASSERT_EQ(uc.cmd, UCALL_SYNC); \ - ASSERT_EQ(uc.args[1], __stage); \ + REPORT_GUEST_ASSERT(uc); \ + TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \ + TEST_ASSERT_EQ(uc.args[1], __stage); \ }) #define HOST_SYNC(vcpu, stage) \ diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index a849ce23ca97..b32960189f5f 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -88,7 +88,7 @@ static void *vcpu_worker(void *data) } if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT) - REPORT_GUEST_ASSERT_1(uc, "val = %lu"); + REPORT_GUEST_ASSERT(uc); return NULL; } @@ -156,19 +156,22 @@ static void guest_code_move_memory_region(void) * window where the memslot is invalid is usually quite small. */ val = guest_spin_on_val(0); - GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val); + __GUEST_ASSERT(val == 1 || val == MMIO_VAL, + "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val); /* Spin until the misaligning memory region move completes. */ val = guest_spin_on_val(MMIO_VAL); - GUEST_ASSERT_1(val == 1 || val == 0, val); + __GUEST_ASSERT(val == 1 || val == 0, + "Expected '0' or '1' (no MMIO), got '%llx'", val); /* Spin until the memory region starts to get re-aligned. */ val = guest_spin_on_val(0); - GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val); + __GUEST_ASSERT(val == 1 || val == MMIO_VAL, + "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val); /* Spin until the re-aligning memory region move completes. */ val = guest_spin_on_val(MMIO_VAL); - GUEST_ASSERT_1(val == 1, val); + GUEST_ASSERT_EQ(val, 1); GUEST_DONE(); } @@ -224,15 +227,15 @@ static void guest_code_delete_memory_region(void) /* Spin until the memory region is deleted. */ val = guest_spin_on_val(0); - GUEST_ASSERT_1(val == MMIO_VAL, val); + GUEST_ASSERT_EQ(val, MMIO_VAL); /* Spin until the memory region is recreated. */ val = guest_spin_on_val(MMIO_VAL); - GUEST_ASSERT_1(val == 0, val); + GUEST_ASSERT_EQ(val, 0); /* Spin until the memory region is deleted. */ val = guest_spin_on_val(0); - GUEST_ASSERT_1(val == MMIO_VAL, val); + GUEST_ASSERT_EQ(val, MMIO_VAL); asm("1:\n\t" ".pushsection .rodata\n\t" @@ -249,7 +252,7 @@ static void guest_code_delete_memory_region(void) "final_rip_end: .quad 1b\n\t" ".popsection"); - GUEST_ASSERT_1(0, 0); + GUEST_ASSERT(0); } static void test_delete_memory_region(void) diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index c87f38712073..171adfb2a6cb 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -31,8 +31,8 @@ static uint64_t guest_stolen_time[NR_VCPUS]; static void check_status(struct kvm_steal_time *st) { GUEST_ASSERT(!(READ_ONCE(st->version) & 1)); - GUEST_ASSERT(READ_ONCE(st->flags) == 0); - GUEST_ASSERT(READ_ONCE(st->preempted) == 0); + GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0); + GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0); } static void guest_code(int cpu) @@ -40,7 +40,7 @@ static void guest_code(int cpu) struct kvm_steal_time *st = st_gva[cpu]; uint32_t version; - GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED)); + GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED)); memset(st, 0, sizeof(*st)); GUEST_SYNC(0); @@ -122,8 +122,8 @@ static int64_t smccc(uint32_t func, uint64_t arg) static void check_status(struct st_time *st) { - GUEST_ASSERT(READ_ONCE(st->rev) == 0); - GUEST_ASSERT(READ_ONCE(st->attr) == 0); + GUEST_ASSERT_EQ(READ_ONCE(st->rev), 0); + GUEST_ASSERT_EQ(READ_ONCE(st->attr), 0); } static void guest_code(int cpu) @@ -132,15 +132,15 @@ static void guest_code(int cpu) int64_t status; status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES); - GUEST_ASSERT(status == 0); + GUEST_ASSERT_EQ(status, 0); status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES); - GUEST_ASSERT(status == 0); + GUEST_ASSERT_EQ(status, 0); status = smccc(PV_TIME_FEATURES, PV_TIME_ST); - GUEST_ASSERT(status == 0); + GUEST_ASSERT_EQ(status, 0); status = smccc(PV_TIME_ST, 0); - GUEST_ASSERT(status != -1); - GUEST_ASSERT(status == (ulong)st_gva[cpu]); + GUEST_ASSERT_NE(status, -1); + GUEST_ASSERT_EQ(status, (ulong)st_gva[cpu]); st = (struct st_time *)status; GUEST_SYNC(0); diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index d3c3aa93f090..3b34d8156d1c 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -35,10 +35,10 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) guest_cpuid->entries[i].index, &eax, &ebx, &ecx, &edx); - GUEST_ASSERT(eax == guest_cpuid->entries[i].eax && - ebx == guest_cpuid->entries[i].ebx && - ecx == guest_cpuid->entries[i].ecx && - edx == guest_cpuid->entries[i].edx); + GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax); + GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx); + GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx); + GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx); } } @@ -51,7 +51,7 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid) GUEST_SYNC(2); - GUEST_ASSERT(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF) == 0x40000001); + GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001); GUEST_DONE(); } @@ -116,7 +116,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage) case UCALL_DONE: return; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); default: TEST_ASSERT(false, "Unexpected exit: %s", exit_reason_str(vcpu->run->exit_reason)); diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c index beb7e2c10211..634c6bfcd572 100644 --- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c +++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c @@ -72,7 +72,7 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) vcpu_run(vcpu); - ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); vcpu_last_completed_iteration[vcpu_idx] = current_iteration; @@ -179,12 +179,12 @@ static void run_test(enum vm_guest_mode mode, void *unused) * with that capability. */ if (dirty_log_manual_caps) { - ASSERT_EQ(stats_clear_pass[0].hugepages, 0); - ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages); - ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages); + TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0); + TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages); + TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages); } else { - ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0); - ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages); + TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0); + TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages); } /* @@ -192,9 +192,9 @@ static void run_test(enum vm_guest_mode mode, void *unused) * memory again, the page counts should be the same as they were * right after initial population of memory. */ - ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k); - ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m); - ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g); + TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k); + TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m); + TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g); } static void help(char *name) diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c index e334844d6e1d..6c2e5e0ceb1f 100644 --- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c +++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c @@ -35,7 +35,7 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); handle_flds_emulation_failure_exit(vcpu); vcpu_run(vcpu); - ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c index 73af44d2167f..e036db1f32b9 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c @@ -8,7 +8,6 @@ * Copyright 2022 Google LLC * Author: Vipin Sharma <vipinsh@google.com> */ - #include "kvm_util.h" #include "processor.h" #include "hyperv.h" @@ -84,7 +83,7 @@ int main(void) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "arg1 = %ld, arg2 = %ld"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: break; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 78606de9385d..9f28aa276c4e 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -53,16 +53,21 @@ static void guest_msr(struct msr_data *msr) vector = rdmsr_safe(msr->idx, &msr_val); if (msr->fault_expected) - GUEST_ASSERT_3(vector == GP_VECTOR, msr->idx, vector, GP_VECTOR); + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP on %sMSR(0x%x), got vector '0x%x'", + msr->idx, msr->write ? "WR" : "RD", vector); else - GUEST_ASSERT_3(!vector, msr->idx, vector, 0); + __GUEST_ASSERT(!vector, + "Expected success on %sMSR(0x%x), got vector '0x%x'", + msr->idx, msr->write ? "WR" : "RD", vector); if (vector || is_write_only_msr(msr->idx)) goto done; if (msr->write) - GUEST_ASSERT_3(msr_val == msr->write_val, msr->idx, - msr_val, msr->write_val); + __GUEST_ASSERT(!vector, + "WRMSR(0x%x) to '0x%llx', RDMSR read '0x%llx'", + msr->idx, msr->write_val, msr_val); /* Invariant TSC bit appears when TSC invariant control MSR is written to */ if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) { @@ -82,7 +87,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) u64 res, input, output; uint8_t vector; - GUEST_ASSERT(hcall->control); + GUEST_ASSERT_NE(hcall->control, 0); wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); @@ -96,10 +101,14 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) vector = __hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { - GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector); + __GUEST_ASSERT(vector == UD_VECTOR, + "Expected #UD for control '%u', got vector '0x%x'", + hcall->control, vector); } else { - GUEST_ASSERT_2(!vector, hcall->control, vector); - GUEST_ASSERT_2(res == hcall->expect, hcall->expect, res); + __GUEST_ASSERT(!vector, + "Expected no exception for control '%u', got vector '0x%x'", + hcall->control, vector); + GUEST_ASSERT_EQ(res, hcall->expect); } GUEST_DONE(); @@ -495,7 +504,7 @@ static void guest_test_msrs_access(void) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_3(uc, "MSR = %lx, arg1 = %lx, arg2 = %lx"); + REPORT_GUEST_ASSERT(uc); return; case UCALL_DONE: break; @@ -665,7 +674,7 @@ static void guest_test_hcalls_access(void) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "arg1 = %lx, arg2 = %lx"); + REPORT_GUEST_ASSERT(uc); return; case UCALL_DONE: break; diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index f774a9e62858..9e2879af7c20 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -46,10 +46,10 @@ static void test_msr(struct msr_data *msr) PR_MSR(msr); vector = rdmsr_safe(msr->idx, &ignored); - GUEST_ASSERT_1(vector == GP_VECTOR, vector); + GUEST_ASSERT_EQ(vector, GP_VECTOR); vector = wrmsr_safe(msr->idx, 0); - GUEST_ASSERT_1(vector == GP_VECTOR, vector); + GUEST_ASSERT_EQ(vector, GP_VECTOR); } struct hcall_data { @@ -77,7 +77,7 @@ static void test_hcall(struct hcall_data *hc) PR_HCALL(hc); r = kvm_hypercall(hc->nr, 0, 0, 0, 0); - GUEST_ASSERT(r == -KVM_ENOSYS); + GUEST_ASSERT_EQ(r, -KVM_ENOSYS); } static void guest_main(void) @@ -125,7 +125,7 @@ static void enter_guest(struct kvm_vcpu *vcpu) pr_hcall(&uc); break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_1(uc, "vector = %lu"); + REPORT_GUEST_ASSERT(uc); return; case UCALL_DONE: return; diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c index 72812644d7f5..80aa3d8b18f8 100644 --- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c @@ -16,14 +16,25 @@ enum monitor_mwait_testcases { MWAIT_DISABLED = BIT(2), }; +/* + * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all + * other scenarios KVM should emulate them as nops. + */ +#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector) \ +do { \ + bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) && \ + ((testcase) & MWAIT_DISABLED); \ + \ + if (fault_wanted) \ + __GUEST_ASSERT((vector) == UD_VECTOR, \ + "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", vector); \ + else \ + __GUEST_ASSERT(!(vector), \ + "Expected success on " insn " for testcase '0x%x', got '0x%x'", vector); \ +} while (0) + static void guest_monitor_wait(int testcase) { - /* - * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, - * in all other scenarios KVM should emulate them as nops. - */ - bool fault_wanted = (testcase & MWAIT_QUIRK_DISABLED) && - (testcase & MWAIT_DISABLED); u8 vector; GUEST_SYNC(testcase); @@ -33,16 +44,10 @@ static void guest_monitor_wait(int testcase) * intercept checks, so the inputs for MONITOR and MWAIT must be valid. */ vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0)); - if (fault_wanted) - GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector); - else - GUEST_ASSERT_2(!vector, testcase, vector); + GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector); vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); - if (fault_wanted) - GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector); - else - GUEST_ASSERT_2(!vector, testcase, vector); + GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector); } static void guest_code(void) @@ -85,7 +90,7 @@ int main(int argc, char *argv[]) testcase = uc.args[1]; break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "testcase = %lx, vector = %ld"); + REPORT_GUEST_ASSERT(uc); goto done; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c index 6502aa23c2f8..3670331adf21 100644 --- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c @@ -180,9 +180,7 @@ static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector) "Expected L2 to ask for %d, L2 says it's done", vector); break; case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld (0x%lx != 0x%lx)", - (const char *)uc.args[0], __FILE__, uc.args[1], - uc.args[2], uc.args[3]); + REPORT_GUEST_ASSERT(uc); break; default: TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd); @@ -247,12 +245,12 @@ int main(int argc, char *argv[]) /* Verify the pending events comes back out the same as it went in. */ vcpu_events_get(vcpu, &events); - ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD, - KVM_VCPUEVENT_VALID_PAYLOAD); - ASSERT_EQ(events.exception.pending, true); - ASSERT_EQ(events.exception.nr, SS_VECTOR); - ASSERT_EQ(events.exception.has_error_code, true); - ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE); + TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD, + KVM_VCPUEVENT_VALID_PAYLOAD); + TEST_ASSERT_EQ(events.exception.pending, true); + TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR); + TEST_ASSERT_EQ(events.exception.has_error_code, true); + TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE); /* * Run for real with the pending #SS, L1 should get a VM-Exit due to diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 40507ed9fe8a..283cc55597a4 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -27,6 +27,15 @@ #define ARCH_PERFMON_BRANCHES_RETIRED 5 #define NUM_BRANCHES 42 +#define INTEL_PMC_IDX_FIXED 32 + +/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */ +#define MAX_FILTER_EVENTS 300 +#define MAX_TEST_EVENTS 10 + +#define PMU_EVENT_FILTER_INVALID_ACTION (KVM_PMU_EVENT_DENY + 1) +#define PMU_EVENT_FILTER_INVALID_FLAGS (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1) +#define PMU_EVENT_FILTER_INVALID_NEVENTS (MAX_FILTER_EVENTS + 1) /* * This is how the event selector and unit mask are stored in an AMD @@ -69,21 +78,33 @@ #define INST_RETIRED EVENT(0xc0, 0) +struct __kvm_pmu_event_filter { + __u32 action; + __u32 nevents; + __u32 fixed_counter_bitmap; + __u32 flags; + __u32 pad[4]; + __u64 events[MAX_FILTER_EVENTS]; +}; + /* * This event list comprises Intel's eight architectural events plus * AMD's "retired branch instructions" for Zen[123] (and possibly * other AMD CPUs). */ -static const uint64_t event_list[] = { - EVENT(0x3c, 0), - INST_RETIRED, - EVENT(0x3c, 1), - EVENT(0x2e, 0x4f), - EVENT(0x2e, 0x41), - EVENT(0xc4, 0), - EVENT(0xc5, 0), - EVENT(0xa4, 1), - AMD_ZEN_BR_RETIRED, +static const struct __kvm_pmu_event_filter base_event_filter = { + .nevents = ARRAY_SIZE(base_event_filter.events), + .events = { + EVENT(0x3c, 0), + INST_RETIRED, + EVENT(0x3c, 1), + EVENT(0x2e, 0x4f), + EVENT(0x2e, 0x41), + EVENT(0xc4, 0), + EVENT(0xc5, 0), + EVENT(0xa4, 1), + AMD_ZEN_BR_RETIRED, + }, }; struct { @@ -225,48 +246,11 @@ static bool sanity_check_pmu(struct kvm_vcpu *vcpu) return !r; } -static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents) -{ - struct kvm_pmu_event_filter *f; - int size = sizeof(*f) + nevents * sizeof(f->events[0]); - - f = malloc(size); - TEST_ASSERT(f, "Out of memory"); - memset(f, 0, size); - f->nevents = nevents; - return f; -} - - -static struct kvm_pmu_event_filter * -create_pmu_event_filter(const uint64_t event_list[], int nevents, - uint32_t action, uint32_t flags) -{ - struct kvm_pmu_event_filter *f; - int i; - - f = alloc_pmu_event_filter(nevents); - f->action = action; - f->flags = flags; - for (i = 0; i < nevents; i++) - f->events[i] = event_list[i]; - - return f; -} - -static struct kvm_pmu_event_filter *event_filter(uint32_t action) -{ - return create_pmu_event_filter(event_list, - ARRAY_SIZE(event_list), - action, 0); -} - /* * Remove the first occurrence of 'event' (if any) from the filter's * event list. */ -static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, - uint64_t event) +static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event) { bool found = false; int i; @@ -279,7 +263,6 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, } if (found) f->nevents--; - return f; } #define ASSERT_PMC_COUNTING_INSTRUCTIONS() \ @@ -315,66 +298,73 @@ static void test_without_filter(struct kvm_vcpu *vcpu) } static void test_with_filter(struct kvm_vcpu *vcpu, - struct kvm_pmu_event_filter *f) + struct __kvm_pmu_event_filter *__f) { + struct kvm_pmu_event_filter *f = (void *)__f; + vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); run_vcpu_and_sync_pmc_results(vcpu); } static void test_amd_deny_list(struct kvm_vcpu *vcpu) { - uint64_t event = EVENT(0x1C2, 0); - struct kvm_pmu_event_filter *f; + struct __kvm_pmu_event_filter f = { + .action = KVM_PMU_EVENT_DENY, + .nevents = 1, + .events = { + EVENT(0x1C2, 0), + }, + }; - f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY, 0); - test_with_filter(vcpu, f); - free(f); + test_with_filter(vcpu, &f); ASSERT_PMC_COUNTING_INSTRUCTIONS(); } static void test_member_deny_list(struct kvm_vcpu *vcpu) { - struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); + struct __kvm_pmu_event_filter f = base_event_filter; - test_with_filter(vcpu, f); - free(f); + f.action = KVM_PMU_EVENT_DENY; + test_with_filter(vcpu, &f); ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS(); } static void test_member_allow_list(struct kvm_vcpu *vcpu) { - struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); + struct __kvm_pmu_event_filter f = base_event_filter; - test_with_filter(vcpu, f); - free(f); + f.action = KVM_PMU_EVENT_ALLOW; + test_with_filter(vcpu, &f); ASSERT_PMC_COUNTING_INSTRUCTIONS(); } static void test_not_member_deny_list(struct kvm_vcpu *vcpu) { - struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); + struct __kvm_pmu_event_filter f = base_event_filter; - remove_event(f, INST_RETIRED); - remove_event(f, INTEL_BR_RETIRED); - remove_event(f, AMD_ZEN_BR_RETIRED); - test_with_filter(vcpu, f); - free(f); + f.action = KVM_PMU_EVENT_DENY; + + remove_event(&f, INST_RETIRED); + remove_event(&f, INTEL_BR_RETIRED); + remove_event(&f, AMD_ZEN_BR_RETIRED); + test_with_filter(vcpu, &f); ASSERT_PMC_COUNTING_INSTRUCTIONS(); } static void test_not_member_allow_list(struct kvm_vcpu *vcpu) { - struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); + struct __kvm_pmu_event_filter f = base_event_filter; + + f.action = KVM_PMU_EVENT_ALLOW; - remove_event(f, INST_RETIRED); - remove_event(f, INTEL_BR_RETIRED); - remove_event(f, AMD_ZEN_BR_RETIRED); - test_with_filter(vcpu, f); - free(f); + remove_event(&f, INST_RETIRED); + remove_event(&f, INTEL_BR_RETIRED); + remove_event(&f, AMD_ZEN_BR_RETIRED); + test_with_filter(vcpu, &f); ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS(); } @@ -569,19 +559,16 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu, const uint64_t masked_events[], const int nmasked_events) { - struct kvm_pmu_event_filter *f; + struct __kvm_pmu_event_filter f = { + .nevents = nmasked_events, + .action = KVM_PMU_EVENT_ALLOW, + .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS, + }; - f = create_pmu_event_filter(masked_events, nmasked_events, - KVM_PMU_EVENT_ALLOW, - KVM_PMU_EVENT_FLAG_MASKED_EVENTS); - test_with_filter(vcpu, f); - free(f); + memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events); + test_with_filter(vcpu, &f); } -/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */ -#define MAX_FILTER_EVENTS 300 -#define MAX_TEST_EVENTS 10 - #define ALLOW_LOADS BIT(0) #define ALLOW_STORES BIT(1) #define ALLOW_LOADS_STORES BIT(2) @@ -753,21 +740,33 @@ static void test_masked_events(struct kvm_vcpu *vcpu) run_masked_events_tests(vcpu, events, nevents); } -static int run_filter_test(struct kvm_vcpu *vcpu, const uint64_t *events, - int nevents, uint32_t flags) +static int set_pmu_event_filter(struct kvm_vcpu *vcpu, + struct __kvm_pmu_event_filter *__f) { - struct kvm_pmu_event_filter *f; - int r; + struct kvm_pmu_event_filter *f = (void *)__f; - f = create_pmu_event_filter(events, nevents, KVM_PMU_EVENT_ALLOW, flags); - r = __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); - free(f); + return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); +} - return r; +static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event, + uint32_t flags, uint32_t action) +{ + struct __kvm_pmu_event_filter f = { + .nevents = 1, + .flags = flags, + .action = action, + .events = { + event, + }, + }; + + return set_pmu_event_filter(vcpu, &f); } static void test_filter_ioctl(struct kvm_vcpu *vcpu) { + uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + struct __kvm_pmu_event_filter f; uint64_t e = ~0ul; int r; @@ -775,15 +774,144 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu) * Unfortunately having invalid bits set in event data is expected to * pass when flags == 0 (bits other than eventsel+umask). */ - r = run_filter_test(vcpu, &e, 1, 0); + r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW); TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing"); - r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS); + r = set_pmu_single_event_filter(vcpu, e, + KVM_PMU_EVENT_FLAG_MASKED_EVENTS, + KVM_PMU_EVENT_ALLOW); TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail"); e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf); - r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS); + r = set_pmu_single_event_filter(vcpu, e, + KVM_PMU_EVENT_FLAG_MASKED_EVENTS, + KVM_PMU_EVENT_ALLOW); TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing"); + + f = base_event_filter; + f.action = PMU_EVENT_FILTER_INVALID_ACTION; + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(r, "Set invalid action is expected to fail"); + + f = base_event_filter; + f.flags = PMU_EVENT_FILTER_INVALID_FLAGS; + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(r, "Set invalid flags is expected to fail"); + + f = base_event_filter; + f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS; + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(r, "Exceeding the max number of filter events should fail"); + + f = base_event_filter; + f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0); + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed"); +} + +static void intel_run_fixed_counter_guest_code(uint8_t fixed_ctr_idx) +{ + for (;;) { + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx, 0); + + /* Only OS_EN bit is enabled for fixed counter[idx]. */ + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * fixed_ctr_idx)); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, + BIT_ULL(INTEL_PMC_IDX_FIXED + fixed_ctr_idx)); + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx)); + } +} + +static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, + uint32_t action, uint32_t bitmap) +{ + struct __kvm_pmu_event_filter f = { + .action = action, + .fixed_counter_bitmap = bitmap, + }; + set_pmu_event_filter(vcpu, &f); + + return run_vcpu_to_sync(vcpu); +} + +static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu, + uint32_t action, + uint32_t bitmap) +{ + struct __kvm_pmu_event_filter f = base_event_filter; + + f.action = action; + f.fixed_counter_bitmap = bitmap; + set_pmu_event_filter(vcpu, &f); + + return run_vcpu_to_sync(vcpu); +} + +static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, + uint8_t nr_fixed_counters) +{ + unsigned int i; + uint32_t bitmap; + uint64_t count; + + TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8, + "Invalid nr_fixed_counters"); + + /* + * Check the fixed performance counter can count normally when KVM + * userspace doesn't set any pmu filter. + */ + count = run_vcpu_to_sync(vcpu); + TEST_ASSERT(count, "Unexpected count value: %ld\n", count); + + for (i = 0; i < BIT(nr_fixed_counters); i++) { + bitmap = BIT(i); + count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW, + bitmap); + TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx))); + + count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY, + bitmap); + TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx))); + + /* + * Check that fixed_counter_bitmap has higher priority than + * events[] when both are set. + */ + count = test_set_gp_and_fixed_event_filter(vcpu, + KVM_PMU_EVENT_ALLOW, + bitmap); + TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx))); + + count = test_set_gp_and_fixed_event_filter(vcpu, + KVM_PMU_EVENT_DENY, + bitmap); + TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx))); + } +} + +static void test_fixed_counter_bitmap(void) +{ + uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint8_t idx; + + /* + * Check that pmu_event_filter works as expected when it's applied to + * fixed performance counters. + */ + for (idx = 0; idx < nr_fixed_counters; idx++) { + vm = vm_create_with_one_vcpu(&vcpu, + intel_run_fixed_counter_guest_code); + vcpu_args_set(vcpu, 1, idx); + __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters); + kvm_vm_free(vm); + } } int main(int argc, char *argv[]) @@ -829,6 +957,7 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); test_pmu_config_disable(guest_code); + test_fixed_counter_bitmap(); return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c index 4c416ebe7d66..cbc92a862ea9 100644 --- a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c +++ b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c @@ -57,7 +57,7 @@ int main(void) for (i = 0; i < KVM_MAX_VCPUS; i++) vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC); - ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0); + TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0); vcpuN = vcpus[KVM_MAX_VCPUS - 1]; for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { @@ -65,8 +65,8 @@ int main(void) vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED); } - ASSERT_EQ(pthread_cancel(thread), 0); - ASSERT_EQ(pthread_join(thread, NULL), 0); + TEST_ASSERT_EQ(pthread_cancel(thread), 0); + TEST_ASSERT_EQ(pthread_join(thread, NULL), 0); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c index b25d7556b638..366cf18600bc 100644 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c @@ -20,7 +20,7 @@ static void guest_bsp_vcpu(void *arg) { GUEST_SYNC(1); - GUEST_ASSERT(get_bsp_flag() != 0); + GUEST_ASSERT_NE(get_bsp_flag(), 0); GUEST_DONE(); } @@ -29,7 +29,7 @@ static void guest_not_bsp_vcpu(void *arg) { GUEST_SYNC(1); - GUEST_ASSERT(get_bsp_flag() == 0); + GUEST_ASSERT_EQ(get_bsp_flag(), 0); GUEST_DONE(); } @@ -65,7 +65,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu) stage); break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); default: TEST_ASSERT(false, "Unexpected exit: %s", exit_reason_str(vcpu->run->exit_reason)); diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c index 4e2479716da6..7ee44496cf97 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c @@ -8,7 +8,6 @@ * Copyright (C) 2021, Red Hat, Inc. * */ - #include <stdatomic.h> #include <stdio.h> #include <unistd.h> @@ -34,13 +33,12 @@ static void l2_guest_code_int(void); static void guest_int_handler(struct ex_regs *regs) { int_fired++; - GUEST_ASSERT_2(regs->rip == (unsigned long)l2_guest_code_int, - regs->rip, (unsigned long)l2_guest_code_int); + GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int); } static void l2_guest_code_int(void) { - GUEST_ASSERT_1(int_fired == 1, int_fired); + GUEST_ASSERT_EQ(int_fired, 1); /* * Same as the vmmcall() function, but with a ud2 sneaked after the @@ -53,7 +51,7 @@ static void l2_guest_code_int(void) : "rbx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); - GUEST_ASSERT_1(bp_fired == 1, bp_fired); + GUEST_ASSERT_EQ(bp_fired, 1); hlt(); } @@ -66,9 +64,9 @@ static void guest_nmi_handler(struct ex_regs *regs) if (nmi_stage_get() == 1) { vmmcall(); - GUEST_ASSERT(false); + GUEST_FAIL("Unexpected resume after VMMCALL"); } else { - GUEST_ASSERT_1(nmi_stage_get() == 3, nmi_stage_get()); + GUEST_ASSERT_EQ(nmi_stage_get(), 3); GUEST_DONE(); } } @@ -104,7 +102,8 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i } run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_VMMCALL, + __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL, + "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'", vmcb->control.exit_code, vmcb->control.exit_info_1, vmcb->control.exit_info_2); @@ -112,7 +111,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i clgi(); x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI); - GUEST_ASSERT_1(nmi_stage_get() == 1, nmi_stage_get()); + GUEST_ASSERT_EQ(nmi_stage_get(), 1); nmi_stage_inc(); stgi(); @@ -133,7 +132,8 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i vmcb->control.next_rip = vmcb->save.rip + 2; run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_HLT, + __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT, + "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'", vmcb->control.exit_code, vmcb->control.exit_info_1, vmcb->control.exit_info_2); @@ -185,7 +185,7 @@ static void run_test(bool is_nmi) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_3(uc, "vals = 0x%lx 0x%lx 0x%lx"); + REPORT_GUEST_ASSERT(uc); break; /* NOT REACHED */ case UCALL_DONE: diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 2da89fdc2471..00965ba33f73 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -15,6 +15,7 @@ #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> +#include <pthread.h> #include "test_util.h" #include "kvm_util.h" @@ -80,6 +81,133 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left, #define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS) #define INVALID_SYNC_FIELD 0x80000000 +/* + * Set an exception as pending *and* injected while KVM is processing events. + * KVM is supposed to ignore/drop pending exceptions if userspace is also + * requesting that an exception be injected. + */ +static void *race_events_inj_pen(void *arg) +{ + struct kvm_run *run = (struct kvm_run *)arg; + struct kvm_vcpu_events *events = &run->s.regs.events; + + WRITE_ONCE(events->exception.nr, UD_VECTOR); + + for (;;) { + WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS); + WRITE_ONCE(events->flags, 0); + WRITE_ONCE(events->exception.injected, 1); + WRITE_ONCE(events->exception.pending, 1); + + pthread_testcancel(); + } + + return NULL; +} + +/* + * Set an invalid exception vector while KVM is processing events. KVM is + * supposed to reject any vector >= 32, as well as NMIs (vector 2). + */ +static void *race_events_exc(void *arg) +{ + struct kvm_run *run = (struct kvm_run *)arg; + struct kvm_vcpu_events *events = &run->s.regs.events; + + for (;;) { + WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS); + WRITE_ONCE(events->flags, 0); + WRITE_ONCE(events->exception.nr, UD_VECTOR); + WRITE_ONCE(events->exception.pending, 1); + WRITE_ONCE(events->exception.nr, 255); + + pthread_testcancel(); + } + + return NULL; +} + +/* + * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is + * illegal, and KVM's MMU heavily relies on vCPU state being valid. + */ +static noinline void *race_sregs_cr4(void *arg) +{ + struct kvm_run *run = (struct kvm_run *)arg; + __u64 *cr4 = &run->s.regs.sregs.cr4; + __u64 pae_enabled = *cr4; + __u64 pae_disabled = *cr4 & ~X86_CR4_PAE; + + for (;;) { + WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS); + WRITE_ONCE(*cr4, pae_enabled); + asm volatile(".rept 512\n\t" + "nop\n\t" + ".endr"); + WRITE_ONCE(*cr4, pae_disabled); + + pthread_testcancel(); + } + + return NULL; +} + +static void race_sync_regs(void *racer) +{ + const time_t TIMEOUT = 2; /* seconds, roughly */ + struct kvm_x86_state *state; + struct kvm_translation tr; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + pthread_t thread; + time_t t; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; + + run->kvm_valid_regs = KVM_SYNC_X86_SREGS; + vcpu_run(vcpu); + run->kvm_valid_regs = 0; + + /* Save state *before* spawning the thread that mucks with vCPU state. */ + state = vcpu_save_state(vcpu); + + /* + * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE + * should already be set in guest state. + */ + TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) && + (run->s.regs.sregs.efer & EFER_LME), + "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d", + !!(run->s.regs.sregs.cr4 & X86_CR4_PAE), + !!(run->s.regs.sregs.efer & EFER_LME)); + + TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0); + + for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { + /* + * Reload known good state if the vCPU triple faults, e.g. due + * to the unhandled #GPs being injected. VMX preserves state + * on shutdown, but SVM synthesizes an INIT as the VMCB state + * is architecturally undefined on triple fault. + */ + if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN) + vcpu_load_state(vcpu, state); + + if (racer == race_sregs_cr4) { + tr = (struct kvm_translation) { .linear_address = 0 }; + __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr); + } + } + + TEST_ASSERT_EQ(pthread_cancel(thread), 0); + TEST_ASSERT_EQ(pthread_join(thread, NULL), 0); + + kvm_x86_state_cleanup(state); + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; @@ -218,5 +346,9 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); + race_sync_regs(race_sregs_cr4); + race_sync_regs(race_events_exc); + race_sync_regs(race_events_inj_pen); + return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c index c9f67702f657..12b0964f4f13 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -84,7 +84,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage) ksft_test_result_pass("stage %d passed\n", stage + 1); return; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); default: TEST_ASSERT(false, "Unexpected exit: %s", exit_reason_str(vcpu->run->exit_reason)); @@ -103,39 +103,39 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, guest_code); val = 0; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* Guest: writes to MSR_IA32_TSC affect both MSRs. */ run_vcpu(vcpu, 1); val = 1ull * GUEST_STEP; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */ run_vcpu(vcpu, 2); val = 2ull * GUEST_STEP; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* * Host: writes to MSR_IA32_TSC set the host-side offset * and therefore do not change MSR_IA32_TSC_ADJUST. */ vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); run_vcpu(vcpu, 3); /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456); /* Restore previous value. */ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the @@ -143,8 +143,8 @@ int main(void) */ run_vcpu(vcpu, 4); val = 3ull * GUEST_STEP; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side @@ -152,8 +152,8 @@ int main(void) */ run_vcpu(vcpu, 5); val = 4ull * GUEST_STEP; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c index 0cb51fa42773..255c50b0dc32 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c @@ -20,8 +20,8 @@ static void guest_ins_port80(uint8_t *buffer, unsigned int count) end = (unsigned long)buffer + 8192; asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory"); - GUEST_ASSERT_1(count == 0, count); - GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end); + GUEST_ASSERT_EQ(count, 0); + GUEST_ASSERT_EQ((unsigned long)buffer, end); } static void guest_code(void) @@ -43,7 +43,9 @@ static void guest_code(void) memset(buffer, 0, sizeof(buffer)); guest_ins_port80(buffer, 8192); for (i = 0; i < 8192; i++) - GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]); + __GUEST_ASSERT(buffer[i] == 0xaa, + "Expected '0xaa', got '0x%x' at buffer[%u]", + buffer[i], i); GUEST_DONE(); } @@ -91,7 +93,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "argN+1 = 0x%lx, argN+2 = 0x%lx"); + REPORT_GUEST_ASSERT(uc); default: TEST_FAIL("Unknown ucall %lu", uc.cmd); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c index be0bdb8c6f78..a9b827c69f32 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c @@ -50,7 +50,7 @@ static void set_timer(void) timer.it_value.tv_sec = 0; timer.it_value.tv_usec = 200; timer.it_interval = timer.it_value; - ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0); + TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0); } static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 4c90f76930f9..ebbcb0a3f743 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -10,7 +10,6 @@ * and check it can be retrieved with KVM_GET_MSR, also test * the invalid LBR formats are rejected. */ - #define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> @@ -52,23 +51,24 @@ static const union perf_capabilities format_caps = { .pebs_format = -1, }; +static void guest_test_perf_capabilities_gp(uint64_t val) +{ + uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); + + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP for value '0x%llx', got vector '0x%x'", + val, vector); +} + static void guest_code(uint64_t current_val) { - uint8_t vector; int i; - vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, current_val); - GUEST_ASSERT_2(vector == GP_VECTOR, current_val, vector); - - vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, 0); - GUEST_ASSERT_2(vector == GP_VECTOR, 0, vector); + guest_test_perf_capabilities_gp(current_val); + guest_test_perf_capabilities_gp(0); - for (i = 0; i < 64; i++) { - vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, - current_val ^ BIT_ULL(i)); - GUEST_ASSERT_2(vector == GP_VECTOR, - current_val ^ BIT_ULL(i), vector); - } + for (i = 0; i < 64; i++) + guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i)); GUEST_DONE(); } @@ -95,7 +95,7 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "val = 0x%lx, vector = %lu"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: break; @@ -103,7 +103,8 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap) TEST_FAIL("Unexpected ucall: %lu", uc.cmd); } - ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), host_cap.capabilities); + TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), + host_cap.capabilities); vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 396c13f42457..ab75b873a4ad 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -65,17 +65,17 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val) vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic); vcpu_run(vcpu); - ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); - ASSERT_EQ(uc.args[1], val); + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); + TEST_ASSERT_EQ(uc.args[1], val); vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; if (!x->is_x2apic) { val &= (-1u | (0xffull << (32 + 24))); - ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); + TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); } else { - ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); } } diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c index 905bd5ae4431..77d04a7bdadd 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c @@ -4,7 +4,6 @@ * * Copyright (C) 2022, Google LLC. */ - #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -20,13 +19,14 @@ * Assert that architectural dependency rules are satisfied, e.g. that AVX is * supported if and only if SSE is supported. */ -#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \ -do { \ - uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \ - \ - GUEST_ASSERT_3((__supported & (xfeatures)) != (xfeatures) || \ - __supported == ((xfeatures) | (dependencies)), \ - __supported, (xfeatures), (dependencies)); \ +#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \ +do { \ + uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \ + \ + __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \ + __supported == ((xfeatures) | (dependencies)), \ + "supported = 0x%llx, xfeatures = 0x%llx, dependencies = 0x%llx", \ + __supported, (xfeatures), (dependencies)); \ } while (0) /* @@ -41,7 +41,8 @@ do { \ do { \ uint64_t __supported = (supported_xcr0) & (xfeatures); \ \ - GUEST_ASSERT_2(!__supported || __supported == (xfeatures), \ + __GUEST_ASSERT(!__supported || __supported == (xfeatures), \ + "supported = 0x%llx, xfeatures = 0x%llx", \ __supported, (xfeatures)); \ } while (0) @@ -79,14 +80,18 @@ static void guest_code(void) XFEATURE_MASK_XTILE); vector = xsetbv_safe(0, supported_xcr0); - GUEST_ASSERT_2(!vector, supported_xcr0, vector); + __GUEST_ASSERT(!vector, + "Expected success on XSETBV(0x%llx), got vector '0x%x'", + supported_xcr0, vector); for (i = 0; i < 64; i++) { if (supported_xcr0 & BIT_ULL(i)) continue; vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i)); - GUEST_ASSERT_3(vector == GP_VECTOR, supported_xcr0, vector, BIT_ULL(i)); + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP on XSETBV(0x%llx), supported XCR0 = %llx, got vector '0x%x'", + BIT_ULL(i), supported_xcr0, vector); } GUEST_DONE(); @@ -117,7 +122,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_3(uc, "0x%lx 0x%lx 0x%lx"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c index c94cde3b523f..e149d0574961 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c @@ -108,16 +108,16 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); if (run->exit_reason == KVM_EXIT_XEN) { - ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL); - ASSERT_EQ(run->xen.u.hcall.cpl, 0); - ASSERT_EQ(run->xen.u.hcall.longmode, 1); - ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE); - ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1)); - ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2)); - ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3)); - ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4)); - ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5)); - ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6)); + TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL); + TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0); + TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1); + TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE); + TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6)); run->xen.u.hcall.result = RETVALUE; continue; } diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index dba0e8ba002f..3df008677239 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -18,6 +18,7 @@ #include <sys/syscall.h> #include <sys/wait.h> #include <unistd.h> +#include <ctype.h> #include "common.h" @@ -43,7 +44,6 @@ */ static size_t mfd_def_size = MFD_DEF_SIZE; static const char *memfd_str = MEMFD_STR; -static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *)); static int newpid_thread_fn2(void *arg); static void join_newpid_thread(pid_t pid); @@ -96,12 +96,12 @@ static void sysctl_assert_write(const char *val) int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC); if (fd < 0) { - printf("open sysctl failed\n"); + printf("open sysctl failed: %m\n"); abort(); } if (write(fd, val, strlen(val)) < 0) { - printf("write sysctl failed\n"); + printf("write sysctl %s failed: %m\n", val); abort(); } } @@ -111,7 +111,7 @@ static void sysctl_fail_write(const char *val) int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC); if (fd < 0) { - printf("open sysctl failed\n"); + printf("open sysctl failed: %m\n"); abort(); } @@ -122,6 +122,33 @@ static void sysctl_fail_write(const char *val) } } +static void sysctl_assert_equal(const char *val) +{ + char *p, buf[128] = {}; + int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC); + + if (fd < 0) { + printf("open sysctl failed: %m\n"); + abort(); + } + + if (read(fd, buf, sizeof(buf)) < 0) { + printf("read sysctl failed: %m\n"); + abort(); + } + + /* Strip trailing whitespace. */ + p = buf; + while (!isspace(*p)) + p++; + *p = '\0'; + + if (strcmp(buf, val) != 0) { + printf("unexpected sysctl value: expected %s, got %s\n", val, buf); + abort(); + } +} + static int mfd_assert_reopen_fd(int fd_in) { int fd; @@ -736,7 +763,7 @@ static int idle_thread_fn(void *arg) return 0; } -static pid_t spawn_idle_thread(unsigned int flags) +static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg) { uint8_t *stack; pid_t pid; @@ -747,10 +774,7 @@ static pid_t spawn_idle_thread(unsigned int flags) abort(); } - pid = clone(idle_thread_fn, - stack + STACK_SIZE, - SIGCHLD | flags, - NULL); + pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg); if (pid < 0) { printf("clone() failed: %m\n"); abort(); @@ -759,6 +783,33 @@ static pid_t spawn_idle_thread(unsigned int flags) return pid; } +static void join_thread(pid_t pid) +{ + int wstatus; + + if (waitpid(pid, &wstatus, 0) < 0) { + printf("newpid thread: waitpid() failed: %m\n"); + abort(); + } + + if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) { + printf("newpid thread: exited with non-zero error code %d\n", + WEXITSTATUS(wstatus)); + abort(); + } + + if (WIFSIGNALED(wstatus)) { + printf("newpid thread: killed by signal %d\n", + WTERMSIG(wstatus)); + abort(); + } +} + +static pid_t spawn_idle_thread(unsigned int flags) +{ + return spawn_thread(flags, idle_thread_fn, NULL); +} + static void join_idle_thread(pid_t pid) { kill(pid, SIGTERM); @@ -1111,109 +1162,260 @@ static void test_noexec_seal(void) close(fd); } -static void test_sysctl_child(void) +static void test_sysctl_sysctl0(void) { int fd; - int pid; - printf("%s sysctl 0\n", memfd_str); - sysctl_assert_write("0"); - fd = mfd_assert_new("kern_memfd_sysctl_0", + sysctl_assert_equal("0"); + + fd = mfd_assert_new("kern_memfd_sysctl_0_dfl", mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); - mfd_assert_mode(fd, 0777); mfd_assert_has_seals(fd, 0); mfd_assert_chmod(fd, 0644); close(fd); +} - printf("%s sysctl 1\n", memfd_str); - sysctl_assert_write("1"); - fd = mfd_assert_new("kern_memfd_sysctl_1", +static void test_sysctl_set_sysctl0(void) +{ + sysctl_assert_write("0"); + test_sysctl_sysctl0(); +} + +static void test_sysctl_sysctl1(void) +{ + int fd; + + sysctl_assert_equal("1"); + + fd = mfd_assert_new("kern_memfd_sysctl_1_dfl", mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); + mfd_assert_mode(fd, 0666); + mfd_assert_has_seals(fd, F_SEAL_EXEC); + mfd_fail_chmod(fd, 0777); + close(fd); - printf("%s child ns\n", memfd_str); - pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn2); - join_newpid_thread(pid); + fd = mfd_assert_new("kern_memfd_sysctl_1_exec", + mfd_def_size, + MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING); + mfd_assert_mode(fd, 0777); + mfd_assert_has_seals(fd, 0); + mfd_assert_chmod(fd, 0644); + close(fd); + fd = mfd_assert_new("kern_memfd_sysctl_1_noexec", + mfd_def_size, + MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING); mfd_assert_mode(fd, 0666); mfd_assert_has_seals(fd, F_SEAL_EXEC); mfd_fail_chmod(fd, 0777); - sysctl_fail_write("0"); close(fd); - - printf("%s sysctl 2\n", memfd_str); - sysctl_assert_write("2"); - mfd_fail_new("kern_memfd_sysctl_2", - MFD_CLOEXEC | MFD_ALLOW_SEALING); - sysctl_fail_write("0"); - sysctl_fail_write("1"); } -static int newpid_thread_fn(void *arg) +static void test_sysctl_set_sysctl1(void) { - test_sysctl_child(); - return 0; + sysctl_assert_write("1"); + test_sysctl_sysctl1(); } -static void test_sysctl_child2(void) +static void test_sysctl_sysctl2(void) { int fd; - sysctl_fail_write("0"); - fd = mfd_assert_new("kern_memfd_sysctl_1", + sysctl_assert_equal("2"); + + fd = mfd_assert_new("kern_memfd_sysctl_2_dfl", mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); + mfd_assert_mode(fd, 0666); + mfd_assert_has_seals(fd, F_SEAL_EXEC); + mfd_fail_chmod(fd, 0777); + close(fd); + + mfd_fail_new("kern_memfd_sysctl_2_exec", + MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING); + fd = mfd_assert_new("kern_memfd_sysctl_2_noexec", + mfd_def_size, + MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING); mfd_assert_mode(fd, 0666); mfd_assert_has_seals(fd, F_SEAL_EXEC); mfd_fail_chmod(fd, 0777); close(fd); } -static int newpid_thread_fn2(void *arg) +static void test_sysctl_set_sysctl2(void) +{ + sysctl_assert_write("2"); + test_sysctl_sysctl2(); +} + +static int sysctl_simple_child(void *arg) +{ + int fd; + int pid; + + printf("%s sysctl 0\n", memfd_str); + test_sysctl_set_sysctl0(); + + printf("%s sysctl 1\n", memfd_str); + test_sysctl_set_sysctl1(); + + printf("%s sysctl 0\n", memfd_str); + test_sysctl_set_sysctl0(); + + printf("%s sysctl 2\n", memfd_str); + test_sysctl_set_sysctl2(); + + printf("%s sysctl 1\n", memfd_str); + test_sysctl_set_sysctl1(); + + printf("%s sysctl 0\n", memfd_str); + test_sysctl_set_sysctl0(); + + return 0; +} + +/* + * Test sysctl + * A very basic test to make sure the core sysctl semantics work. + */ +static void test_sysctl_simple(void) +{ + int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL); + + join_thread(pid); +} + +static int sysctl_nested(void *arg) { - test_sysctl_child2(); + void (*fn)(void) = arg; + + fn(); return 0; } -static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *)) + +static int sysctl_nested_wait(void *arg) { - uint8_t *stack; - pid_t pid; + /* Wait for a SIGCONT. */ + kill(getpid(), SIGSTOP); + return sysctl_nested(arg); +} - stack = malloc(STACK_SIZE); - if (!stack) { - printf("malloc(STACK_SIZE) failed: %m\n"); - abort(); - } +static void test_sysctl_sysctl1_failset(void) +{ + sysctl_fail_write("0"); + test_sysctl_sysctl1(); +} - pid = clone(fn, - stack + STACK_SIZE, - SIGCHLD | flags, - NULL); - if (pid < 0) { - printf("clone() failed: %m\n"); - abort(); - } +static void test_sysctl_sysctl2_failset(void) +{ + sysctl_fail_write("1"); + test_sysctl_sysctl2(); - return pid; + sysctl_fail_write("0"); + test_sysctl_sysctl2(); } -static void join_newpid_thread(pid_t pid) +static int sysctl_nested_child(void *arg) { - waitpid(pid, NULL, 0); + int fd; + int pid; + + printf("%s nested sysctl 0\n", memfd_str); + sysctl_assert_write("0"); + /* A further nested pidns works the same. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL); + join_thread(pid); + + printf("%s nested sysctl 1\n", memfd_str); + sysctl_assert_write("1"); + /* Child inherits our setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1); + join_thread(pid); + /* Child cannot raise the setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, + test_sysctl_sysctl1_failset); + join_thread(pid); + /* Child can lower the setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, + test_sysctl_set_sysctl2); + join_thread(pid); + /* Child lowering the setting has no effect on our setting. */ + test_sysctl_sysctl1(); + + printf("%s nested sysctl 2\n", memfd_str); + sysctl_assert_write("2"); + /* Child inherits our setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2); + join_thread(pid); + /* Child cannot raise the setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, + test_sysctl_sysctl2_failset); + join_thread(pid); + + /* Verify that the rules are actually inherited after fork. */ + printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str); + sysctl_assert_write("0"); + + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl1_failset); + sysctl_assert_write("1"); + kill(pid, SIGCONT); + join_thread(pid); + + printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str); + sysctl_assert_write("0"); + + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl2_failset); + sysctl_assert_write("2"); + kill(pid, SIGCONT); + join_thread(pid); + + /* + * Verify that the current effective setting is saved on fork, meaning + * that the parent lowering the sysctl doesn't affect already-forked + * children. + */ + printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str); + sysctl_assert_write("2"); + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl2); + sysctl_assert_write("1"); + kill(pid, SIGCONT); + join_thread(pid); + + printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str); + sysctl_assert_write("2"); + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl2); + sysctl_assert_write("0"); + kill(pid, SIGCONT); + join_thread(pid); + + printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str); + sysctl_assert_write("1"); + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl1); + sysctl_assert_write("0"); + kill(pid, SIGCONT); + join_thread(pid); + + return 0; } /* - * Test sysctl - * A very basic sealing test to see whether setting/retrieving seals works. + * Test sysctl with nested pid namespaces + * Make sure that the sysctl nesting semantics work correctly. */ -static void test_sysctl(void) +static void test_sysctl_nested(void) { - int pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn); + int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL); - join_newpid_thread(pid); + join_thread(pid); } /* @@ -1399,6 +1601,9 @@ int main(int argc, char **argv) test_seal_grow(); test_seal_resize(); + test_sysctl_simple(); + test_sysctl_nested(); + test_share_dup("SHARE-DUP", ""); test_share_mmap("SHARE-MMAP", ""); test_share_open("SHARE-OPEN", ""); @@ -1413,8 +1618,6 @@ int main(int argc, char **argv) test_share_fork("SHARE-FORK", SHARED_FT_STR); join_idle_thread(pid); - test_sysctl(); - printf("memfd: DONE\n"); return 0; diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 7e2a982383c0..cdc9ce4426b9 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -5,6 +5,7 @@ hugepage-mremap hugepage-shm hugepage-vmemmap hugetlb-madvise +hugetlb-read-hwpoison khugepaged map_hugetlb map_populate diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 66d7c07dc177..6a9fc5693145 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -35,39 +35,43 @@ MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) LDLIBS = -lrt -lpthread -TEST_GEN_PROGS = cow -TEST_GEN_PROGS += compaction_test -TEST_GEN_PROGS += gup_longterm -TEST_GEN_PROGS += gup_test -TEST_GEN_PROGS += hmm-tests -TEST_GEN_PROGS += hugetlb-madvise -TEST_GEN_PROGS += hugepage-mmap -TEST_GEN_PROGS += hugepage-mremap -TEST_GEN_PROGS += hugepage-shm -TEST_GEN_PROGS += hugepage-vmemmap -TEST_GEN_PROGS += khugepaged -TEST_GEN_PROGS += madv_populate -TEST_GEN_PROGS += map_fixed_noreplace -TEST_GEN_PROGS += map_hugetlb -TEST_GEN_PROGS += map_populate -TEST_GEN_PROGS += memfd_secret -TEST_GEN_PROGS += migration -TEST_GEN_PROGS += mkdirty -TEST_GEN_PROGS += mlock-random-test -TEST_GEN_PROGS += mlock2-tests -TEST_GEN_PROGS += mrelease_test -TEST_GEN_PROGS += mremap_dontunmap -TEST_GEN_PROGS += mremap_test -TEST_GEN_PROGS += on-fault-limit -TEST_GEN_PROGS += thuge-gen -TEST_GEN_PROGS += transhuge-stress -TEST_GEN_PROGS += uffd-stress -TEST_GEN_PROGS += uffd-unit-tests +TEST_GEN_FILES = cow +TEST_GEN_FILES += compaction_test +TEST_GEN_FILES += gup_longterm +TEST_GEN_FILES += gup_test +TEST_GEN_FILES += hmm-tests +TEST_GEN_FILES += hugetlb-madvise +TEST_GEN_FILES += hugetlb-read-hwpoison +TEST_GEN_FILES += hugepage-mmap +TEST_GEN_FILES += hugepage-mremap +TEST_GEN_FILES += hugepage-shm +TEST_GEN_FILES += hugepage-vmemmap +TEST_GEN_FILES += khugepaged +TEST_GEN_FILES += madv_populate +TEST_GEN_FILES += map_fixed_noreplace +TEST_GEN_FILES += map_hugetlb +TEST_GEN_FILES += map_populate +TEST_GEN_FILES += memfd_secret +TEST_GEN_FILES += migration +TEST_GEN_FILES += mkdirty +TEST_GEN_FILES += mlock-random-test +TEST_GEN_FILES += mlock2-tests +TEST_GEN_FILES += mrelease_test +TEST_GEN_FILES += mremap_dontunmap +TEST_GEN_FILES += mremap_test +TEST_GEN_FILES += on-fault-limit +TEST_GEN_FILES += thuge-gen +TEST_GEN_FILES += transhuge-stress +TEST_GEN_FILES += uffd-stress +TEST_GEN_FILES += uffd-unit-tests +TEST_GEN_FILES += split_huge_page_test +TEST_GEN_FILES += ksm_tests +TEST_GEN_FILES += ksm_functional_tests +TEST_GEN_FILES += mdwe_test + +ifneq ($(ARCH),arm64) TEST_GEN_PROGS += soft-dirty -TEST_GEN_PROGS += split_huge_page_test -TEST_GEN_PROGS += ksm_tests -TEST_GEN_PROGS += ksm_functional_tests -TEST_GEN_PROGS += mdwe_test +endif ifeq ($(ARCH),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32) @@ -83,24 +87,24 @@ CFLAGS += -no-pie endif ifeq ($(CAN_BUILD_I386),1) -TEST_GEN_PROGS += $(BINARIES_32) +TEST_GEN_FILES += $(BINARIES_32) endif ifeq ($(CAN_BUILD_X86_64),1) -TEST_GEN_PROGS += $(BINARIES_64) +TEST_GEN_FILES += $(BINARIES_64) endif else ifneq (,$(findstring $(ARCH),ppc64)) -TEST_GEN_PROGS += protection_keys +TEST_GEN_FILES += protection_keys endif endif ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64)) -TEST_GEN_PROGS += va_high_addr_switch -TEST_GEN_PROGS += virtual_address_range -TEST_GEN_PROGS += write_to_hugetlbfs +TEST_GEN_FILES += va_high_addr_switch +TEST_GEN_FILES += virtual_address_range +TEST_GEN_FILES += write_to_hugetlbfs endif TEST_PROGS := run_vmtests.sh @@ -112,6 +116,7 @@ TEST_FILES += va_high_addr_switch.sh include ../lib.mk $(TEST_GEN_PROGS): vm_util.c +$(TEST_GEN_FILES): vm_util.c $(OUTPUT)/uffd-stress: uffd-common.c $(OUTPUT)/uffd-unit-tests: uffd-common.c diff --git a/tools/testing/selftests/mm/hugetlb-read-hwpoison.c b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c new file mode 100644 index 000000000000..ba6cc6f9cabc --- /dev/null +++ b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <linux/magic.h> +#include <sys/mman.h> +#include <sys/statfs.h> +#include <errno.h> +#include <stdbool.h> + +#include "../kselftest.h" + +#define PREFIX " ... " +#define ERROR_PREFIX " !!! " + +#define MAX_WRITE_READ_CHUNK_SIZE (getpagesize() * 16) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +enum test_status { + TEST_PASSED = 0, + TEST_FAILED = 1, + TEST_SKIPPED = 2, +}; + +static char *status_to_str(enum test_status status) +{ + switch (status) { + case TEST_PASSED: + return "TEST_PASSED"; + case TEST_FAILED: + return "TEST_FAILED"; + case TEST_SKIPPED: + return "TEST_SKIPPED"; + default: + return "TEST_???"; + } +} + +static int setup_filemap(char *filemap, size_t len, size_t wr_chunk_size) +{ + char iter = 0; + + for (size_t offset = 0; offset < len; + offset += wr_chunk_size) { + iter++; + memset(filemap + offset, iter, wr_chunk_size); + } + + return 0; +} + +static bool verify_chunk(char *buf, size_t len, char val) +{ + size_t i; + + for (i = 0; i < len; ++i) { + if (buf[i] != val) { + printf(PREFIX ERROR_PREFIX "check fail: buf[%lu] = %u != %u\n", + i, buf[i], val); + return false; + } + } + + return true; +} + +static bool seek_read_hugepage_filemap(int fd, size_t len, size_t wr_chunk_size, + off_t offset, size_t expected) +{ + char buf[MAX_WRITE_READ_CHUNK_SIZE]; + ssize_t ret_count = 0; + ssize_t total_ret_count = 0; + char val = offset / wr_chunk_size + offset % wr_chunk_size; + + printf(PREFIX PREFIX "init val=%u with offset=0x%lx\n", val, offset); + printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n", + expected); + if (lseek(fd, offset, SEEK_SET) < 0) { + perror(PREFIX ERROR_PREFIX "seek failed"); + return false; + } + + while (offset + total_ret_count < len) { + ret_count = read(fd, buf, wr_chunk_size); + if (ret_count == 0) { + printf(PREFIX PREFIX "read reach end of the file\n"); + break; + } else if (ret_count < 0) { + perror(PREFIX ERROR_PREFIX "read failed"); + break; + } + ++val; + if (!verify_chunk(buf, ret_count, val)) + return false; + + total_ret_count += ret_count; + } + printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n", + total_ret_count); + + return total_ret_count == expected; +} + +static bool read_hugepage_filemap(int fd, size_t len, + size_t wr_chunk_size, size_t expected) +{ + char buf[MAX_WRITE_READ_CHUNK_SIZE]; + ssize_t ret_count = 0; + ssize_t total_ret_count = 0; + char val = 0; + + printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n", + expected); + while (total_ret_count < len) { + ret_count = read(fd, buf, wr_chunk_size); + if (ret_count == 0) { + printf(PREFIX PREFIX "read reach end of the file\n"); + break; + } else if (ret_count < 0) { + perror(PREFIX ERROR_PREFIX "read failed"); + break; + } + ++val; + if (!verify_chunk(buf, ret_count, val)) + return false; + + total_ret_count += ret_count; + } + printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n", + total_ret_count); + + return total_ret_count == expected; +} + +static enum test_status +test_hugetlb_read(int fd, size_t len, size_t wr_chunk_size) +{ + enum test_status status = TEST_SKIPPED; + char *filemap = NULL; + + if (ftruncate(fd, len) < 0) { + perror(PREFIX ERROR_PREFIX "ftruncate failed"); + return status; + } + + filemap = mmap(NULL, len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (filemap == MAP_FAILED) { + perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed"); + goto done; + } + + setup_filemap(filemap, len, wr_chunk_size); + status = TEST_FAILED; + + if (read_hugepage_filemap(fd, len, wr_chunk_size, len)) + status = TEST_PASSED; + + munmap(filemap, len); +done: + if (ftruncate(fd, 0) < 0) { + perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed"); + status = TEST_FAILED; + } + + return status; +} + +static enum test_status +test_hugetlb_read_hwpoison(int fd, size_t len, size_t wr_chunk_size, + bool skip_hwpoison_page) +{ + enum test_status status = TEST_SKIPPED; + char *filemap = NULL; + char *hwp_addr = NULL; + const unsigned long pagesize = getpagesize(); + + if (ftruncate(fd, len) < 0) { + perror(PREFIX ERROR_PREFIX "ftruncate failed"); + return status; + } + + filemap = mmap(NULL, len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (filemap == MAP_FAILED) { + perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed"); + goto done; + } + + setup_filemap(filemap, len, wr_chunk_size); + status = TEST_FAILED; + + /* + * Poisoned hugetlb page layout (assume hugepagesize=2MB): + * |<---------------------- 1MB ---------------------->| + * |<---- healthy page ---->|<---- HWPOISON page ----->| + * |<------------------- (1MB - 8KB) ----------------->| + */ + hwp_addr = filemap + len / 2 + pagesize; + if (madvise(hwp_addr, pagesize, MADV_HWPOISON) < 0) { + perror(PREFIX ERROR_PREFIX "MADV_HWPOISON failed"); + goto unmap; + } + + if (!skip_hwpoison_page) { + /* + * Userspace should be able to read (1MB + 1 page) from + * the beginning of the HWPOISONed hugepage. + */ + if (read_hugepage_filemap(fd, len, wr_chunk_size, + len / 2 + pagesize)) + status = TEST_PASSED; + } else { + /* + * Userspace should be able to read (1MB - 2 pages) from + * HWPOISONed hugepage. + */ + if (seek_read_hugepage_filemap(fd, len, wr_chunk_size, + len / 2 + MAX(2 * pagesize, wr_chunk_size), + len / 2 - MAX(2 * pagesize, wr_chunk_size))) + status = TEST_PASSED; + } + +unmap: + munmap(filemap, len); +done: + if (ftruncate(fd, 0) < 0) { + perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed"); + status = TEST_FAILED; + } + + return status; +} + +static int create_hugetlbfs_file(struct statfs *file_stat) +{ + int fd; + + fd = memfd_create("hugetlb_tmp", MFD_HUGETLB); + if (fd < 0) { + perror(PREFIX ERROR_PREFIX "could not open hugetlbfs file"); + return -1; + } + + memset(file_stat, 0, sizeof(*file_stat)); + if (fstatfs(fd, file_stat)) { + perror(PREFIX ERROR_PREFIX "fstatfs failed"); + goto close; + } + if (file_stat->f_type != HUGETLBFS_MAGIC) { + printf(PREFIX ERROR_PREFIX "not hugetlbfs file\n"); + goto close; + } + + return fd; +close: + close(fd); + return -1; +} + +int main(void) +{ + int fd; + struct statfs file_stat; + enum test_status status; + /* Test read() in different granularity. */ + size_t wr_chunk_sizes[] = { + getpagesize() / 2, getpagesize(), + getpagesize() * 2, getpagesize() * 4 + }; + size_t i; + + for (i = 0; i < ARRAY_SIZE(wr_chunk_sizes); ++i) { + printf("Write/read chunk size=0x%lx\n", + wr_chunk_sizes[i]); + + fd = create_hugetlbfs_file(&file_stat); + if (fd < 0) + goto create_failure; + printf(PREFIX "HugeTLB read regression test...\n"); + status = test_hugetlb_read(fd, file_stat.f_bsize, + wr_chunk_sizes[i]); + printf(PREFIX "HugeTLB read regression test...%s\n", + status_to_str(status)); + close(fd); + if (status == TEST_FAILED) + return -1; + + fd = create_hugetlbfs_file(&file_stat); + if (fd < 0) + goto create_failure; + printf(PREFIX "HugeTLB read HWPOISON test...\n"); + status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize, + wr_chunk_sizes[i], false); + printf(PREFIX "HugeTLB read HWPOISON test...%s\n", + status_to_str(status)); + close(fd); + if (status == TEST_FAILED) + return -1; + + fd = create_hugetlbfs_file(&file_stat); + if (fd < 0) + goto create_failure; + printf(PREFIX "HugeTLB seek then read HWPOISON test...\n"); + status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize, + wr_chunk_sizes[i], true); + printf(PREFIX "HugeTLB seek then read HWPOISON test...%s\n", + status_to_str(status)); + close(fd); + if (status == TEST_FAILED) + return -1; + } + + return 0; + +create_failure: + printf(ERROR_PREFIX "Abort test: failed to create hugetlbfs file\n"); + return -1; +} diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 26853badae70..901e950f9138 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -27,8 +27,12 @@ #define KiB 1024u #define MiB (1024 * KiB) +static int mem_fd; static int ksm_fd; static int ksm_full_scans_fd; +static int proc_self_ksm_stat_fd; +static int proc_self_ksm_merging_pages_fd; +static int ksm_use_zero_pages_fd; static int pagemap_fd; static size_t pagesize; @@ -59,6 +63,49 @@ static bool range_maps_duplicates(char *addr, unsigned long size) return false; } +static long get_my_ksm_zero_pages(void) +{ + char buf[200]; + char *substr_ksm_zero; + size_t value_pos; + ssize_t read_size; + unsigned long my_ksm_zero_pages; + + if (!proc_self_ksm_stat_fd) + return 0; + + read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0); + if (read_size < 0) + return -errno; + + buf[read_size] = 0; + + substr_ksm_zero = strstr(buf, "ksm_zero_pages"); + if (!substr_ksm_zero) + return 0; + + value_pos = strcspn(substr_ksm_zero, "0123456789"); + my_ksm_zero_pages = strtol(substr_ksm_zero + value_pos, NULL, 10); + + return my_ksm_zero_pages; +} + +static long get_my_merging_pages(void) +{ + char buf[10]; + ssize_t ret; + + if (proc_self_ksm_merging_pages_fd < 0) + return proc_self_ksm_merging_pages_fd; + + ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0); + if (ret <= 0) + return -errno; + buf[ret] = 0; + + return strtol(buf, NULL, 10); +} + static long ksm_get_full_scans(void) { char buf[10]; @@ -91,11 +138,30 @@ static int ksm_merge(void) return 0; } -static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl) +static int ksm_unmerge(void) +{ + if (write(ksm_fd, "2", 1) != 1) + return -errno; + return 0; +} + +static char *mmap_and_merge_range(char val, unsigned long size, int prot, + bool use_prctl) { char *map; int ret; + /* Stabilize accounting by disabling KSM completely. */ + if (ksm_unmerge()) { + ksft_test_result_fail("Disabling (unmerging) KSM failed\n"); + goto unmap; + } + + if (get_my_merging_pages() > 0) { + ksft_test_result_fail("Still pages merged\n"); + goto unmap; + } + map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (map == MAP_FAILED) { @@ -112,6 +178,11 @@ static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl) /* Make sure each page contains the same values to merge them. */ memset(map, val, size); + if (mprotect(map, size, prot)) { + ksft_test_result_skip("mprotect() failed\n"); + goto unmap; + } + if (use_prctl) { ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); if (ret < 0 && errno == EINVAL) { @@ -131,6 +202,16 @@ static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl) ksft_test_result_fail("Running KSM failed\n"); goto unmap; } + + /* + * Check if anything was merged at all. Ignore the zero page that is + * accounted differently (depending on kernel support). + */ + if (val && !get_my_merging_pages()) { + ksft_test_result_fail("No pages got merged\n"); + goto unmap; + } + return map; unmap: munmap(map, size); @@ -144,7 +225,7 @@ static void test_unmerge(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); if (map == MAP_FAILED) return; @@ -159,6 +240,70 @@ unmap: munmap(map, size); } +static void test_unmerge_zero_pages(void) +{ + const unsigned int size = 2 * MiB; + char *map; + unsigned int offs; + unsigned long pages_expected; + + ksft_print_msg("[RUN] %s\n", __func__); + + if (proc_self_ksm_stat_fd < 0) { + ksft_test_result_skip("open(\"/proc/self/ksm_stat\") failed\n"); + return; + } + if (ksm_use_zero_pages_fd < 0) { + ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); + return; + } + if (write(ksm_use_zero_pages_fd, "1", 1) != 1) { + ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); + return; + } + + /* Let KSM deduplicate zero pages. */ + map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, false); + if (map == MAP_FAILED) + return; + + /* Check if ksm_zero_pages is updated correctly after KSM merging */ + pages_expected = size / pagesize; + if (pages_expected != get_my_ksm_zero_pages()) { + ksft_test_result_fail("'ksm_zero_pages' updated after merging\n"); + goto unmap; + } + + /* Try to unmerge half of the region */ + if (madvise(map, size / 2, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto unmap; + } + + /* Check if ksm_zero_pages is updated correctly after unmerging */ + pages_expected /= 2; + if (pages_expected != get_my_ksm_zero_pages()) { + ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n"); + goto unmap; + } + + /* Trigger unmerging of the other half by writing to the pages. */ + for (offs = size / 2; offs < size; offs += pagesize) + *((unsigned int *)&map[offs]) = offs; + + /* Now we should have no zeropages remaining. */ + if (get_my_ksm_zero_pages()) { + ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n"); + goto unmap; + } + + /* Check if ksm zero pages are really unmerged */ + ksft_test_result(!range_maps_duplicates(map, size), + "KSM zero pages were unmerged\n"); +unmap: + munmap(map, size); +} + static void test_unmerge_discarded(void) { const unsigned int size = 2 * MiB; @@ -166,7 +311,7 @@ static void test_unmerge_discarded(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); if (map == MAP_FAILED) return; @@ -198,7 +343,7 @@ static void test_unmerge_uffd_wp(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); if (map == MAP_FAILED) return; @@ -341,7 +486,7 @@ static void test_prctl_unmerge(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, true); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, true); if (map == MAP_FAILED) return; @@ -356,9 +501,42 @@ unmap: munmap(map, size); } +static void test_prot_none(void) +{ + const unsigned int size = 2 * MiB; + char *map; + int i; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0x11, size, PROT_NONE, false); + if (map == MAP_FAILED) + goto unmap; + + /* Store a unique value in each page on one half using ptrace */ + for (i = 0; i < size / 2; i += pagesize) { + lseek(mem_fd, (uintptr_t) map + i, SEEK_SET); + if (write(mem_fd, &i, sizeof(i)) != sizeof(i)) { + ksft_test_result_fail("ptrace write failed\n"); + goto unmap; + } + } + + /* Trigger unsharing on the other half. */ + if (madvise(map + size / 2, size / 2, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto unmap; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +unmap: + munmap(map, size); +} + int main(int argc, char **argv) { - unsigned int tests = 5; + unsigned int tests = 7; int err; #ifdef __NR_userfaultfd @@ -370,6 +548,9 @@ int main(int argc, char **argv) pagesize = getpagesize(); + mem_fd = open("/proc/self/mem", O_RDWR); + if (mem_fd < 0) + ksft_exit_fail_msg("opening /proc/self/mem failed\n"); ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); if (ksm_fd < 0) ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); @@ -379,13 +560,20 @@ int main(int argc, char **argv) pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); + proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); + proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", + O_RDONLY); + ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); test_unmerge(); + test_unmerge_zero_pages(); test_unmerge_discarded(); #ifdef __NR_userfaultfd test_unmerge_uffd_wp(); #endif + test_prot_none(); + test_prctl(); test_prctl_fork(); test_prctl_unmerge(); diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c index 60547245e479..17bcb07f19f3 100644 --- a/tools/testing/selftests/mm/madv_populate.c +++ b/tools/testing/selftests/mm/madv_populate.c @@ -264,14 +264,35 @@ static void test_softdirty(void) munmap(addr, SIZE); } +static int system_has_softdirty(void) +{ + /* + * There is no way to check if the kernel supports soft-dirty, other + * than by writing to a page and seeing if the bit was set. But the + * tests are intended to check that the bit gets set when it should, so + * doing that check would turn a potentially legitimate fail into a + * skip. Fortunately, we know for sure that arm64 does not support + * soft-dirty. So for now, let's just use the arch as a corse guide. + */ +#if defined(__aarch64__) + return 0; +#else + return 1; +#endif +} + int main(int argc, char **argv) { + int nr_tests = 16; int err; pagesize = getpagesize(); + if (system_has_softdirty()) + nr_tests += 5; + ksft_print_header(); - ksft_set_plan(21); + ksft_set_plan(nr_tests); sense_support(); test_prot_read(); @@ -279,7 +300,8 @@ int main(int argc, char **argv) test_holes(); test_populate_read(); test_populate_write(); - test_softdirty(); + if (system_has_softdirty()) + test_softdirty(); err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c index 240f2d9dae7a..7945d0754875 100644 --- a/tools/testing/selftests/mm/map_populate.c +++ b/tools/testing/selftests/mm/map_populate.c @@ -77,7 +77,7 @@ int main(int argc, char **argv) unsigned long *smap; ftmp = tmpfile(); - BUG_ON(ftmp == 0, "tmpfile()"); + BUG_ON(!ftmp, "tmpfile()"); ret = ftruncate(fileno(ftmp), MMAP_SZ); BUG_ON(ret, "ftruncate()"); diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index 379581567f27..6908569ef406 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -10,12 +10,13 @@ #include <numa.h> #include <numaif.h> #include <sys/mman.h> +#include <sys/prctl.h> #include <sys/types.h> #include <signal.h> #include <time.h> #define TWOMEG (2<<20) -#define RUNTIME (60) +#define RUNTIME (20) #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) @@ -155,10 +156,15 @@ TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME) memset(ptr, 0xde, TWOMEG); for (i = 0; i < self->nthreads - 1; i++) { pid = fork(); - if (!pid) + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGHUP); + /* Parent may have died before prctl so check now. */ + if (getppid() == 1) + kill(getpid(), SIGHUP); access_mem(ptr); - else + } else { self->pids[i] = pid; + } } ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); diff --git a/tools/testing/selftests/mm/mrelease_test.c b/tools/testing/selftests/mm/mrelease_test.c index dca21042b679..d822004a374e 100644 --- a/tools/testing/selftests/mm/mrelease_test.c +++ b/tools/testing/selftests/mm/mrelease_test.c @@ -7,6 +7,7 @@ #include <stdbool.h> #include <stdio.h> #include <stdlib.h> +#include <sys/syscall.h> #include <sys/wait.h> #include <unistd.h> #include <asm-generic/unistd.h> diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 3f26f6e15b2a..3e2bc818d566 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -12,11 +12,14 @@ exitcode=0 usage() { cat <<EOF -usage: ${BASH_SOURCE[0]:-$0} [ -h | -t "<categories>"] +usage: ${BASH_SOURCE[0]:-$0} [ options ] + + -a: run all tests, including extra ones -t: specify specific categories to tests to run -h: display this message -The default behavior is to run all tests. +The default behavior is to run required tests only. If -a is specified, +will run all tests. Alternatively, specific groups tests can be run by passing a string to the -t argument containing one or more of the following categories @@ -55,14 +58,27 @@ separated by spaces: test soft dirty page bit semantics - cow test copy-on-write semantics +- thp + test transparent huge pages +- migration + invoke move_pages(2) to exercise the migration entry code + paths in the kernel +- mkdirty + test handling of code that might set PTE/PMD dirty in + read-only VMAs +- mdwe + test prctl(PR_SET_MDWE, ...) + example: ./run_vmtests.sh -t "hmm mmap ksm" EOF exit 0 } +RUN_ALL=false -while getopts "ht:" OPT; do +while getopts "aht:" OPT; do case ${OPT} in + "a") RUN_ALL=true ;; "h") usage ;; "t") VM_SELFTEST_ITEMS=${OPTARG} ;; esac @@ -85,6 +101,30 @@ test_selected() { fi } +run_gup_matrix() { + # -t: thp=on, -T: thp=off, -H: hugetlb=on + local hugetlb_mb=$(( needmem_KB / 1024 )) + + for huge in -t -T "-H -m $hugetlb_mb"; do + # -u: gup-fast, -U: gup-basic, -a: pin-fast, -b: pin-basic, -L: pin-longterm + for test_cmd in -u -U -a -b -L; do + # -w: write=1, -W: write=0 + for write in -w -W; do + # -S: shared + for share in -S " "; do + # -n: How many pages to fetch together? 512 is special + # because it's default thp size (or 2M on x86), 123 to + # just test partial gup when hit a huge in whatever form + for num in "-n 1" "-n 512" "-n 123"; do + CATEGORY="gup_test" run_test ./gup_test \ + $huge $test_cmd $write $share $num + done + done + done + done + done +} + # get huge pagesize and freepages from /proc/meminfo while read -r name size unit; do if [ "$name" = "HugePages_Free:" ]; then @@ -189,13 +229,16 @@ fi CATEGORY="mmap" run_test ./map_fixed_noreplace -# get_user_pages_fast() benchmark -CATEGORY="gup_test" run_test ./gup_test -u -# pin_user_pages_fast() benchmark -CATEGORY="gup_test" run_test ./gup_test -a +if $RUN_ALL; then + run_gup_matrix +else + # get_user_pages_fast() benchmark + CATEGORY="gup_test" run_test ./gup_test -u + # pin_user_pages_fast() benchmark + CATEGORY="gup_test" run_test ./gup_test -a +fi # Dump pages 0, 19, and 4096, using pin_user_pages: CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000 - CATEGORY="gup_test" run_test ./gup_longterm CATEGORY="userfaultfd" run_test ./uffd-unit-tests @@ -262,6 +305,10 @@ CATEGORY="madv_populate" run_test ./madv_populate CATEGORY="memfd_secret" run_test ./memfd_secret +# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 +CATEGORY="ksm" run_test ./ksm_tests -H -s 100 +# KSM KSM_MERGE_TIME test with size of 100 +CATEGORY="ksm" run_test ./ksm_tests -P -s 100 # KSM MADV_MERGEABLE test with 10 identical pages CATEGORY="ksm" run_test ./ksm_tests -M -p 10 # KSM unmerge test @@ -290,11 +337,26 @@ then CATEGORY="pkey" run_test ./protection_keys_64 fi -CATEGORY="soft_dirty" run_test ./soft-dirty +if [ -x ./soft-dirty ] +then + CATEGORY="soft_dirty" run_test ./soft-dirty +fi # COW tests CATEGORY="cow" run_test ./cow +CATEGORY="thp" run_test ./khugepaged + +CATEGORY="thp" run_test ./transhuge-stress -d 20 + +CATEGORY="thp" run_test ./split_huge_page_test + +CATEGORY="migration" run_test ./migration + +CATEGORY="mkdirty" run_test ./mkdirty + +CATEGORY="mdwe" run_test ./mdwe_test + echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" exit $exitcode diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings index 9abfc60e9e6f..a953c96aa16e 100644 --- a/tools/testing/selftests/mm/settings +++ b/tools/testing/selftests/mm/settings @@ -1 +1 @@ -timeout=45 +timeout=180 diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index 380ab5f0a534..16ed4dfa7359 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -139,7 +139,7 @@ void test_mmap(unsigned long size, unsigned flags) before, after, before - after, size); assert(size == getpagesize() || (before - after) == NUM_PAGES); show(size); - err = munmap(map, size); + err = munmap(map, size * NUM_PAGES); assert(!err); } @@ -222,7 +222,7 @@ int main(void) test_mmap(ps, MAP_HUGETLB | arg); } printf("Testing default huge mmap\n"); - test_mmap(default_hps, SHM_HUGETLB); + test_mmap(default_hps, MAP_HUGETLB); puts("Testing non-huge shmget"); test_shmget(getpagesize(), 0); diff --git a/tools/testing/selftests/mm/transhuge-stress.c b/tools/testing/selftests/mm/transhuge-stress.c index ba9d37ad3a89..c61fb9350b8c 100644 --- a/tools/testing/selftests/mm/transhuge-stress.c +++ b/tools/testing/selftests/mm/transhuge-stress.c @@ -25,13 +25,14 @@ int main(int argc, char **argv) { size_t ram, len; void *ptr, *p; - struct timespec a, b; + struct timespec start, a, b; int i = 0; char *name = NULL; double s; uint8_t *map; size_t map_len; int pagemap_fd; + int duration = 0; ram = sysconf(_SC_PHYS_PAGES); if (ram > SIZE_MAX / psize() / 4) @@ -42,9 +43,11 @@ int main(int argc, char **argv) while (++i < argc) { if (!strcmp(argv[i], "-h")) - errx(1, "usage: %s [size in MiB]", argv[0]); + errx(1, "usage: %s [-f <filename>] [-d <duration>] [size in MiB]", argv[0]); else if (!strcmp(argv[i], "-f")) name = argv[++i]; + else if (!strcmp(argv[i], "-d")) + duration = atoi(argv[++i]); else len = atoll(argv[i]) << 20; } @@ -78,6 +81,8 @@ int main(int argc, char **argv) if (!map) errx(2, "map malloc"); + clock_gettime(CLOCK_MONOTONIC, &start); + while (1) { int nr_succeed = 0, nr_failed = 0, nr_pages = 0; @@ -118,5 +123,8 @@ int main(int argc, char **argv) "%4d succeed, %4d failed, %4d different pages", s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20), nr_succeed, nr_failed, nr_pages); + + if (duration > 0 && b.tv_sec - start.tv_sec >= duration) + return 0; } } diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index ba20d7504022..02b89860e193 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -499,6 +499,9 @@ void *uffd_poll_thread(void *arg) int ret; char tmp_chr; + if (!args->handle_fault) + args->handle_fault = uffd_handle_page_fault; + pollfd[0].fd = uffd; pollfd[0].events = POLLIN; pollfd[1].fd = pipefd[cpu*2]; @@ -527,7 +530,7 @@ void *uffd_poll_thread(void *arg) err("unexpected msg event %u\n", msg.event); break; case UFFD_EVENT_PAGEFAULT: - uffd_handle_page_fault(&msg, args); + args->handle_fault(&msg, args); break; case UFFD_EVENT_FORK: close(uffd); diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index 197f5262fe0d..7c4fa964c3b0 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -77,6 +77,9 @@ struct uffd_args { unsigned long missing_faults; unsigned long wp_faults; unsigned long minor_faults; + + /* A custom fault handler; defaults to uffd_handle_page_fault. */ + void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args); }; struct uffd_test_ops { diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 995ff13e74c7..469e0476af26 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -53,21 +53,21 @@ pthread_attr_t attr; do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) const char *examples = - "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" - "./userfaultfd anon 100 99999\n\n" - "# Run share memory test on 1GiB region with 99 bounces:\n" - "./userfaultfd shmem 1000 99\n\n" - "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" - "./userfaultfd hugetlb 256 50\n\n" - "# Run the same hugetlb test but using private file:\n" - "./userfaultfd hugetlb-private 256 50\n\n" - "# 10MiB-~6GiB 999 bounces anonymous test, " - "continue forever unless an error triggers\n" - "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; + "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" + "./uffd-stress anon 100 99999\n\n" + "# Run share memory test on 1GiB region with 99 bounces:\n" + "./uffd-stress shmem 1000 99\n\n" + "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" + "./uffd-stress hugetlb 256 50\n\n" + "# Run the same hugetlb test but using private file:\n" + "./uffd-stress hugetlb-private 256 50\n\n" + "# 10MiB-~6GiB 999 bounces anonymous test, " + "continue forever unless an error triggers\n" + "while ./uffd-stress anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; static void usage(void) { - fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces>\n\n"); + fprintf(stderr, "\nUsage: ./uffd-stress <test type> <MiB> <bounces>\n\n"); fprintf(stderr, "Supported <test type>: anon, hugetlb, " "hugetlb-private, shmem, shmem-private\n\n"); fprintf(stderr, "Examples:\n\n"); @@ -189,10 +189,8 @@ static int stress(struct uffd_args *args) locking_thread, (void *)cpu)) return 1; if (bounces & BOUNCE_POLL) { - if (pthread_create(&uffd_threads[cpu], &attr, - uffd_poll_thread, - (void *)&args[cpu])) - return 1; + if (pthread_create(&uffd_threads[cpu], &attr, uffd_poll_thread, &args[cpu])) + err("uffd_poll_thread create"); } else { if (pthread_create(&uffd_threads[cpu], &attr, uffd_read_thread, @@ -250,6 +248,8 @@ static int userfaultfd_stress(void) struct uffd_args args[nr_cpus]; uint64_t mem_size = nr_pages * page_size; + memset(args, 0, sizeof(struct uffd_args) * nr_cpus); + if (uffd_test_ctx_init(UFFD_FEATURE_WP_UNPOPULATED, NULL)) err("context init failed"); diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 04d91f144d1c..2709a34a39c5 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -951,6 +951,117 @@ static void uffd_zeropage_test(uffd_test_args_t *args) uffd_test_pass(); } +static void uffd_register_poison(int uffd, void *addr, uint64_t len) +{ + uint64_t ioctls = 0; + uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON); + + if (uffd_register_with_ioctls(uffd, addr, len, true, + false, false, &ioctls)) + err("poison register fail"); + + if ((ioctls & expected) != expected) + err("registered area doesn't support COPY and POISON ioctls"); +} + +static void do_uffdio_poison(int uffd, unsigned long offset) +{ + struct uffdio_poison uffdio_poison = { 0 }; + int ret; + __s64 res; + + uffdio_poison.range.start = (unsigned long) area_dst + offset; + uffdio_poison.range.len = page_size; + uffdio_poison.mode = 0; + ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison); + res = uffdio_poison.updated; + + if (ret) + err("UFFDIO_POISON error: %"PRId64, (int64_t)res); + else if (res != page_size) + err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res); +} + +static void uffd_poison_handle_fault( + struct uffd_msg *msg, struct uffd_args *args) +{ + unsigned long offset; + + if (msg->event != UFFD_EVENT_PAGEFAULT) + err("unexpected msg event %u", msg->event); + + if (msg->arg.pagefault.flags & + (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR)) + err("unexpected fault type %llu", msg->arg.pagefault.flags); + + offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset &= ~(page_size-1); + + /* Odd pages -> copy zeroed page; even pages -> poison. */ + if (offset & page_size) + copy_page(uffd, offset, false); + else + do_uffdio_poison(uffd, offset); +} + +static void uffd_poison_test(uffd_test_args_t *targs) +{ + pthread_t uffd_mon; + char c; + struct uffd_args args = { 0 }; + struct sigaction act = { 0 }; + unsigned long nr_sigbus = 0; + unsigned long nr; + + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + + uffd_register_poison(uffd, area_dst, nr_pages * page_size); + memset(area_src, 0, nr_pages * page_size); + + args.handle_fault = uffd_poison_handle_fault; + if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) + err("uffd_poll_thread create"); + + sigbuf = &jbuf; + act.sa_sigaction = sighndl; + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGBUS, &act, 0)) + err("sigaction"); + + for (nr = 0; nr < nr_pages; ++nr) { + unsigned long offset = nr * page_size; + const char *bytes = (const char *) area_dst + offset; + const char *i; + + if (sigsetjmp(*sigbuf, 1)) { + /* + * Access below triggered a SIGBUS, which was caught by + * sighndl, which then jumped here. Count this SIGBUS, + * and move on to next page. + */ + ++nr_sigbus; + continue; + } + + for (i = bytes; i < bytes + page_size; ++i) { + if (*i) + err("nonzero byte in area_dst (%p) at %p: %u", + area_dst, i, *i); + } + } + + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + err("pipe write"); + if (pthread_join(uffd_mon, NULL)) + err("pthread_join()"); + + if (nr_sigbus != nr_pages / 2) + err("expected to receive %lu SIGBUS, actually received %lu", + nr_pages / 2, nr_sigbus); + + uffd_test_pass(); +} + /* * Test the returned uffdio_register.ioctls with different register modes. * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. @@ -1126,6 +1237,12 @@ uffd_test_case_t uffd_tests[] = { UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_HUGETLBFS_SHMEM, }, + { + .name = "poison", + .uffd_fn = uffd_poison_test, + .mem_targets = MEM_ALL, + .uffd_feature_required = UFFD_FEATURE_POISON, + }, }; static void usage(const char *prog) diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c index 7cfaf4a74c57..cfbc501290d3 100644 --- a/tools/testing/selftests/mm/va_high_addr_switch.c +++ b/tools/testing/selftests/mm/va_high_addr_switch.c @@ -292,7 +292,7 @@ static int supported_arch(void) #elif defined(__x86_64__) return 1; #elif defined(__aarch64__) - return 1; + return getpagesize() == PAGE_SIZE; #else return 0; #endif diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/linux/export.h index e6b80d5fbd14..e6b80d5fbd14 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/export.h +++ b/tools/testing/selftests/powerpc/copyloops/linux/export.h diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 0ad4f12b3d43..5876220d8ff2 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -24,7 +24,7 @@ /* Setting timeout to -1 disables the alarm */ static uint64_t timeout = 120; -int run_test(int (test_function)(void), char *name) +int run_test(int (test_function)(void), const char *name) { bool terminated; int rc, status; @@ -101,7 +101,7 @@ void test_harness_set_timeout(uint64_t time) timeout = time; } -int test_harness(int (test_function)(void), char *name) +int test_harness(int (test_function)(void), const char *name) { int rc; diff --git a/tools/testing/selftests/powerpc/include/subunit.h b/tools/testing/selftests/powerpc/include/subunit.h index 068d55fdf80f..b0bb774617c9 100644 --- a/tools/testing/selftests/powerpc/include/subunit.h +++ b/tools/testing/selftests/powerpc/include/subunit.h @@ -6,37 +6,37 @@ #ifndef _SELFTESTS_POWERPC_SUBUNIT_H #define _SELFTESTS_POWERPC_SUBUNIT_H -static inline void test_start(char *name) +static inline void test_start(const char *name) { printf("test: %s\n", name); } -static inline void test_failure_detail(char *name, char *detail) +static inline void test_failure_detail(const char *name, const char *detail) { printf("failure: %s [%s]\n", name, detail); } -static inline void test_failure(char *name) +static inline void test_failure(const char *name) { printf("failure: %s\n", name); } -static inline void test_error(char *name) +static inline void test_error(const char *name) { printf("error: %s\n", name); } -static inline void test_skip(char *name) +static inline void test_skip(const char *name) { printf("skip: %s\n", name); } -static inline void test_success(char *name) +static inline void test_success(const char *name) { printf("success: %s\n", name); } -static inline void test_finish(char *name, int status) +static inline void test_finish(const char *name, int status) { if (status) test_failure(name); @@ -44,7 +44,7 @@ static inline void test_finish(char *name, int status) test_success(name); } -static inline void test_set_git_version(char *value) +static inline void test_set_git_version(const char *value) { printf("tags: git_version:%s\n", value); } diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 36c30c611457..66d7b2368dd4 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -32,7 +32,7 @@ typedef uint16_t u16; typedef uint8_t u8; void test_harness_set_timeout(uint64_t time); -int test_harness(int (test_function)(void), char *name); +int test_harness(int (test_function)(void), const char *name); int read_auxv(char *buf, ssize_t buf_size); void *find_auxv_entry(int type, char *auxv); diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 4e1a294eec35..0df1a3afc5e2 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -1,15 +1,16 @@ # SPDX-License-Identifier: GPL-2.0-only +bad_accesses +exec_prot hugetlb_vs_thp_test -subpage_prot -tempfile -prot_sao -segv_errors -wild_bctr large_vm_fork_separation -bad_accesses -tlbie_test +large_vm_gpr_corruption pkey_exec_prot pkey_siginfo +prot_sao +segv_errors stack_expansion_ldst stack_expansion_signal -large_vm_gpr_corruption +subpage_prot +tempfile +tlbie_test +wild_bctr diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index cbeeaeae8837..1b39b86849da 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -36,6 +36,7 @@ $(TM_TESTS): CFLAGS += -I../tm -mhtm CFLAGS += $(KHDR_INCLUDES) -fno-pie $(OUTPUT)/ptrace-gpr: ptrace-gpr.S +$(OUTPUT)/ptrace-perf-hwbreak: ptrace-perf-asm.S $(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread $(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h index d7275b7b33dc..df62ff0735f7 100644 --- a/tools/testing/selftests/powerpc/ptrace/child.h +++ b/tools/testing/selftests/powerpc/ptrace/child.h @@ -48,12 +48,12 @@ struct child_sync { } \ } while (0) -#define PARENT_SKIP_IF_UNSUPPORTED(x, sync) \ +#define PARENT_SKIP_IF_UNSUPPORTED(x, sync, msg) \ do { \ if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \ (sync)->parent_gave_up = true; \ prod_child(sync); \ - SKIP_IF(1); \ + SKIP_IF_MSG(1, msg); \ } \ } while (0) diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index f6f8596ce8e1..f6da4cb30cd6 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -266,7 +266,7 @@ static int parent(struct shared_info *info, pid_t pid) * to the child. */ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3); - PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); + PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync, "PKEYs not supported"); PARENT_FAIL_IF(ret, &info->child_sync); info->amr = regs[0]; diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index f75739bbad28..e374c6b7ace6 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -884,7 +884,7 @@ static int perf_hwbreak(void) { srand ( time(NULL) ); - SKIP_IF(!perf_breakpoint_supported()); + SKIP_IF_MSG(!perf_breakpoint_supported(), "Perf breakpoints not supported"); return runtest(); } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index 1345e9b9af0f..75d30d61ab0e 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -64,26 +64,26 @@ static bool dawr_present(struct ppc_debug_info *dbginfo) static void write_var(int len) { - __u8 *pcvar; - __u16 *psvar; - __u32 *pivar; - __u64 *plvar; + volatile __u8 *pcvar; + volatile __u16 *psvar; + volatile __u32 *pivar; + volatile __u64 *plvar; switch (len) { case 1: - pcvar = (__u8 *)&glvar; + pcvar = (volatile __u8 *)&glvar; *pcvar = 0xff; break; case 2: - psvar = (__u16 *)&glvar; + psvar = (volatile __u16 *)&glvar; *psvar = 0xffff; break; case 4: - pivar = (__u32 *)&glvar; + pivar = (volatile __u32 *)&glvar; *pivar = 0xffffffff; break; case 8: - plvar = (__u64 *)&glvar; + plvar = (volatile __u64 *)&glvar; *plvar = 0xffffffffffffffffLL; break; } @@ -98,16 +98,16 @@ static void read_var(int len) switch (len) { case 1: - cvar = (__u8)glvar; + cvar = (volatile __u8)glvar; break; case 2: - svar = (__u16)glvar; + svar = (volatile __u16)glvar; break; case 4: - ivar = (__u32)glvar; + ivar = (volatile __u32)glvar; break; case 8: - lvar = (__u64)glvar; + lvar = (volatile __u64)glvar; break; } } @@ -603,7 +603,7 @@ static int ptrace_hwbreak(void) wait(NULL); get_dbginfo(child_pid, &dbginfo); - SKIP_IF(dbginfo.num_data_bps == 0); + SKIP_IF_MSG(dbginfo.num_data_bps == 0, "No data breakpoints present"); dawr = dawr_present(&dbginfo); run_tests(child_pid, &dbginfo, dawr); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S new file mode 100644 index 000000000000..9aa2e58f3189 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include <ppc-asm.h> + +.global same_watch_addr_load +.global same_watch_addr_trap + +FUNC_START(same_watch_addr_child) + nop +same_watch_addr_load: + ld 0,0(3) + nop +same_watch_addr_trap: + trap + blr +FUNC_END(same_watch_addr_child) + + +.global perf_then_ptrace_load1 +.global perf_then_ptrace_load2 +.global perf_then_ptrace_trap + +FUNC_START(perf_then_ptrace_child) + nop +perf_then_ptrace_load1: + ld 0,0(3) +perf_then_ptrace_load2: + ld 0,0(4) + nop +perf_then_ptrace_trap: + trap + blr +FUNC_END(perf_then_ptrace_child) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c index 3344e74a97b4..a0a0b9bb5854 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c @@ -1,659 +1,445 @@ // SPDX-License-Identifier: GPL-2.0+ -#include <stdio.h> -#include <string.h> -#include <signal.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <linux/hw_breakpoint.h> -#include <linux/perf_event.h> + #include <asm/unistd.h> -#include <sys/ptrace.h> +#include <linux/hw_breakpoint.h> +#include <linux/ptrace.h> +#include <memory.h> +#include <stdlib.h> #include <sys/wait.h> -#include "ptrace.h" -char data[16]; +#include "utils.h" -/* Overlapping address range */ -volatile __u64 *ptrace_data1 = (__u64 *)&data[0]; -volatile __u64 *perf_data1 = (__u64 *)&data[4]; +/* + * Child subroutine that performs a load on the address, then traps + */ +void same_watch_addr_child(unsigned long *addr); -/* Non-overlapping address range */ -volatile __u64 *ptrace_data2 = (__u64 *)&data[0]; -volatile __u64 *perf_data2 = (__u64 *)&data[8]; +/* Address of the ld instruction in same_watch_addr_child() */ +extern char same_watch_addr_load[]; -static unsigned long pid_max_addr(void) -{ - FILE *fp; - char *line, *c; - char addr[100]; - size_t len = 0; - - fp = fopen("/proc/kallsyms", "r"); - if (!fp) { - printf("Failed to read /proc/kallsyms. Exiting..\n"); - exit(EXIT_FAILURE); - } +/* Address of the end trap instruction in same_watch_addr_child() */ +extern char same_watch_addr_trap[]; - while (getline(&line, &len, fp) != -1) { - if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") || - strstr(line, "pid_max_min")) - continue; +/* + * Child subroutine that performs a load on the first address, then a load on + * the second address (with no instructions separating this from the first + * load), then traps. + */ +void perf_then_ptrace_child(unsigned long *first_addr, unsigned long *second_addr); - strncpy(addr, line, len < 100 ? len : 100); - c = strchr(addr, ' '); - *c = '\0'; - return strtoul(addr, &c, 16); - } - fclose(fp); - printf("Could not find pix_max. Exiting..\n"); - exit(EXIT_FAILURE); - return -1; -} +/* Address of the first ld instruction in perf_then_ptrace_child() */ +extern char perf_then_ptrace_load1[]; -static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len) -{ - memset(attr, 0, sizeof(struct perf_event_attr)); - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(struct perf_event_attr); - attr->bp_type = HW_BREAKPOINT_R; - attr->bp_addr = addr; - attr->bp_len = len; - attr->exclude_kernel = 1; - attr->exclude_hv = 1; -} +/* Address of the second ld instruction in perf_then_ptrace_child() */ +extern char perf_then_ptrace_load2[]; -static void perf_kernel_event_attr_set(struct perf_event_attr *attr) +/* Address of the end trap instruction in perf_then_ptrace_child() */ +extern char perf_then_ptrace_trap[]; + +static inline long sys_ptrace(long request, pid_t pid, unsigned long addr, unsigned long data) { - memset(attr, 0, sizeof(struct perf_event_attr)); - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(struct perf_event_attr); - attr->bp_type = HW_BREAKPOINT_R; - attr->bp_addr = pid_max_addr(); - attr->bp_len = sizeof(unsigned long); - attr->exclude_user = 1; - attr->exclude_hv = 1; + return syscall(__NR_ptrace, request, pid, addr, data); } -static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len) +static long ptrace_traceme(void) { - struct perf_event_attr attr; - - perf_user_event_attr_set(&attr, addr, len); - return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); + return sys_ptrace(PTRACE_TRACEME, 0, 0, 0); } -static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len) +static long ptrace_getregs(pid_t pid, struct pt_regs *result) { - struct perf_event_attr attr; - - perf_user_event_attr_set(&attr, addr, len); - return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); + return sys_ptrace(PTRACE_GETREGS, pid, 0, (unsigned long)result); } -static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len) +static long ptrace_setregs(pid_t pid, struct pt_regs *result) { - struct perf_event_attr attr; - - perf_user_event_attr_set(&attr, addr, len); - return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0); + return sys_ptrace(PTRACE_SETREGS, pid, 0, (unsigned long)result); } -static int perf_thread_kernel_event_open(pid_t child_pid) +static long ptrace_cont(pid_t pid, long signal) { - struct perf_event_attr attr; - - perf_kernel_event_attr_set(&attr); - return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); + return sys_ptrace(PTRACE_CONT, pid, 0, signal); } -static int perf_cpu_kernel_event_open(int cpu) +static long ptrace_singlestep(pid_t pid, long signal) { - struct perf_event_attr attr; - - perf_kernel_event_attr_set(&attr); - return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); + return sys_ptrace(PTRACE_SINGLESTEP, pid, 0, signal); } -static int child(void) +static long ppc_ptrace_gethwdbginfo(pid_t pid, struct ppc_debug_info *dbginfo) { - int ret; - - ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); - if (ret) { - printf("Error: PTRACE_TRACEME failed\n"); - return 0; - } - kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */ - - return 0; + return sys_ptrace(PPC_PTRACE_GETHWDBGINFO, pid, 0, (unsigned long)dbginfo); } -static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, - __u64 addr, int len) +static long ppc_ptrace_sethwdbg(pid_t pid, struct ppc_hw_breakpoint *bp_info) { - info->version = 1; - info->trigger_type = type; - info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; - info->addr = addr; - info->addr2 = addr + len; - info->condition_value = 0; - if (!len) - info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; - else - info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; + return sys_ptrace(PPC_PTRACE_SETHWDEBUG, pid, 0, (unsigned long)bp_info); } -static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len) +static long ppc_ptrace_delhwdbg(pid_t pid, int bp_id) { - struct ppc_hw_breakpoint info; - - ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); - return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); + return sys_ptrace(PPC_PTRACE_DELHWDEBUG, pid, 0L, bp_id); } -static int test1(pid_t child_pid) +static long ptrace_getreg_pc(pid_t pid, void **pc) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing cpu event by perf) - * if (addr range overlaps) - * fail; - */ + struct pt_regs regs; + long err; - perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd < 0) - return -1; + err = ptrace_getregs(pid, ®s); + if (err) + return err; - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd > 0 || errno != ENOSPC) - ret = -1; + *pc = (void *)regs.nip; - close(perf_fd); - return ret; + return 0; } -static int test2(pid_t child_pid) +static long ptrace_setreg_pc(pid_t pid, void *pc) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing cpu event by perf) - * if (addr range does not overlaps) - * allow; - */ + struct pt_regs regs; + long err; - perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); - if (perf_fd < 0) - return -1; - - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) { - ret = -1; - goto perf_close; - } - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - -perf_close: - close(perf_fd); - return ret; -} + err = ptrace_getregs(pid, ®s); + if (err) + return err; -static int test3(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing thread event by perf on the same thread) - * if (addr range overlaps) - * fail; - */ - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, - sizeof(*perf_data1)); - if (perf_fd < 0) - return -1; + regs.nip = (unsigned long)pc; - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd > 0 || errno != ENOSPC) - ret = -1; + err = ptrace_setregs(pid, ®s); + if (err) + return err; - close(perf_fd); - return ret; + return 0; } -static int test4(pid_t child_pid) +static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, + int group_fd, unsigned long flags) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing thread event by perf on the same thread) - * if (addr range does not overlaps) - * fail; - */ - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, - sizeof(*perf_data2)); - if (perf_fd < 0) - return -1; - - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) { - ret = -1; - goto perf_close; - } - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - -perf_close: - close(perf_fd); - return ret; + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } -static int test5(pid_t child_pid) +static void perf_user_event_attr_set(struct perf_event_attr *attr, void *addr, u64 len) { - int perf_fd; - int ptrace_fd; - int cpid; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing thread event by perf on the different thread) - * allow; - */ - cpid = fork(); - if (!cpid) { - /* Temporary Child */ - pause(); - exit(EXIT_SUCCESS); - } - - perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd < 0) { - ret = -1; - goto kill_child; - } - - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) { - ret = -1; - goto perf_close; - } + memset(attr, 0, sizeof(struct perf_event_attr)); - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); -perf_close: - close(perf_fd); -kill_child: - kill(cpid, SIGINT); - return ret; + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = (u64)addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; } -static int test6(pid_t child_pid) +static int perf_watchpoint_open(pid_t child_pid, void *addr, u64 len) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread kernel event by perf) - * if (existing thread event by ptrace on the same thread) - * allow; - * -- OR -- - * if (new per cpu kernel event by perf) - * if (existing thread event by ptrace) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; - - perf_fd = perf_thread_kernel_event_open(child_pid); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); - - perf_fd = perf_cpu_kernel_event_open(0); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); + struct perf_event_attr attr; -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + perf_user_event_attr_set(&attr, addr, len); + return perf_event_open(&attr, child_pid, -1, -1, 0); } -static int test7(pid_t child_pid) +static int perf_read_counter(int perf_fd, u64 *count) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range overlaps) - * fail; + /* + * A perf counter is retrieved by the read() syscall. It contains + * the current count as 8 bytes that are interpreted as a u64 */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; + ssize_t len = read(perf_fd, count, sizeof(*count)); - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, - sizeof(*perf_data1)); - if (perf_fd > 0 || errno != ENOSPC) - ret = -1; + if (len != sizeof(*count)) + return -1; - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + return 0; } -static int test8(pid_t child_pid) +static void ppc_ptrace_init_breakpoint(struct ppc_hw_breakpoint *info, + int type, void *addr, int len) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range does not overlaps) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) - return -1; - - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, - sizeof(*perf_data2)); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); - -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + info->version = 1; + info->trigger_type = type; + info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info->addr = (u64)addr; + info->addr2 = (u64)addr + len; + info->condition_value = 0; + if (!len) + info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; + else + info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; } -static int test9(pid_t child_pid) +/* + * Checks if we can place at least 2 watchpoints on the child process + */ +static int check_watchpoints(pid_t pid) { - int perf_fd; - int ptrace_fd; - int cpid; - int ret = 0; - - /* Test: - * if (new per thread event by perf) - * if (existing thread event by ptrace on the other thread) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; - - cpid = fork(); - if (!cpid) { - /* Temporary Child */ - pause(); - exit(EXIT_SUCCESS); - } + struct ppc_debug_info dbginfo; - perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd < 0) { - ret = -1; - goto kill_child; - } - close(perf_fd); + FAIL_IF_MSG(ppc_ptrace_gethwdbginfo(pid, &dbginfo), "PPC_PTRACE_GETHWDBGINFO failed"); + SKIP_IF_MSG(dbginfo.num_data_bps <= 1, "Not enough data watchpoints (need at least 2)"); -kill_child: - kill(cpid, SIGINT); - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + return 0; } -static int test10(pid_t child_pid) +/* + * Wrapper around a plain fork() call that sets up the child for + * ptrace-ing. Both the parent and child return from this, though + * the child is stopped until ptrace_cont(pid) is run by the parent. + */ +static int ptrace_fork_child(pid_t *pid) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range overlaps) - * fail; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; + int status; - perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd > 0 || errno != ENOSPC) - ret = -1; - - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + *pid = fork(); -static int test11(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range does not overlap) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) - return -1; + if (*pid < 0) + FAIL_IF_MSG(1, "Failed to fork child"); - perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; + if (!*pid) { + FAIL_IF_EXIT_MSG(ptrace_traceme(), "PTRACE_TRACEME failed"); + FAIL_IF_EXIT_MSG(raise(SIGSTOP), "Child failed to raise SIGSTOP"); + } else { + /* Synchronise on child SIGSTOP */ + FAIL_IF_MSG(waitpid(*pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); } - close(perf_fd); -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + return 0; } -static int test12(pid_t child_pid) +/* + * Tests the interaction between ptrace and perf watching the same data. + * + * We expect ptrace to take 'priority', as it is has before-execute + * semantics. + * + * The perf counter should not be incremented yet because perf has after-execute + * semantics. E.g., if ptrace changes the child PC, we don't even execute the + * instruction at all. + * + * When the child is stopped for ptrace, we test both continue and single step. + * Both should increment the perf counter. We also test changing the PC somewhere + * different and stepping, which should not increment the perf counter. + */ +int same_watch_addr_test(void) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread and per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range overlaps) - * fail; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; + struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */ + int bp_id; /* Breakpoint handle of ptrace watchpoint */ + int perf_fd; /* File descriptor of perf performance counter */ + u64 perf_count; /* Most recently fetched perf performance counter value */ + pid_t pid; /* PID of child process */ + void *pc; /* Most recently fetched child PC value */ + int status; /* Stop status of child after waitpid */ + unsigned long value; /* Dummy value to be read/written to by child */ + int err; + + err = ptrace_fork_child(&pid); + if (err) + return err; + + if (!pid) { + same_watch_addr_child(&value); + exit(1); + } - perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd > 0 || errno != ENOSPC) - ret = -1; + err = check_watchpoints(pid); + if (err) + return err; - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + /* Place a perf watchpoint counter on value */ + perf_fd = perf_watchpoint_open(pid, &value, sizeof(value)); + FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter"); -static int test13(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread and per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range does not overlap) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) - return -1; + /* Place a ptrace watchpoint on value */ + ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ, &value, sizeof(value)); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); - perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2)); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); + /* Let the child run. It should stop on the ptrace watchpoint */ + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction"); -static int test14(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int cpid; - int ret = 0; - - /* Test: - * if (new per thread and per cpu event by perf) - * if (existing thread event by ptrace on the other thread) - * allow; + /* + * We stopped before executing the load, so perf should not have + * recorded any events yet */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; - - cpid = fork(); - if (!cpid) { - /* Temporary Child */ - pause(); - exit(EXIT_SUCCESS); - } - - perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1, - sizeof(*perf_data1)); - if (perf_fd < 0) { - ret = -1; - goto kill_child; - } - close(perf_fd); - -kill_child: - kill(cpid, SIGINT); - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 0, "perf recorded unexpected event"); + + /* Single stepping over the load should increment the perf counter */ + FAIL_IF_MSG(ptrace_singlestep(pid, 0), "Failed to single step child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load + 4, "Failed to single step load instruction"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 1, "perf counter did not increment"); + + /* + * Set up a ptrace watchpoint on the value again and trigger it. + * The perf counter should not have incremented because we do not + * execute the load yet. + */ + FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint"); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 1, "perf counter should not have changed"); + + /* Continuing over the load should increment the perf counter */ + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 2, "perf counter did not increment"); + + /* + * If we set the child PC back to the load instruction, then continue, + * we should reach the end trap (because ptrace is one-shot) and have + * another perf event. + */ + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 3, "perf counter did not increment"); + + /* + * If we set the child PC back to the load instruction, set a ptrace + * watchpoint on the load, then continue, we should immediately get + * the ptrace trap without incrementing the perf counter + */ + FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint"); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed"); + + /* + * If we change the PC while stopped on the load instruction, we should + * not increment the perf counter (because ptrace is before-execute, + * perf is after-execute). + */ + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load + 4), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); -static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg) -{ - int ret; + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed"); - ret = fun(arg); - if (ret) - printf("%s: Error\n", msg); - else - printf("%s: Ok\n", msg); - return ret; -} + /* Clean up child */ + FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child"); -char *desc[14] = { - "perf cpu event -> ptrace thread event (Overlapping)", - "perf cpu event -> ptrace thread event (Non-overlapping)", - "perf thread event -> ptrace same thread event (Overlapping)", - "perf thread event -> ptrace same thread event (Non-overlapping)", - "perf thread event -> ptrace other thread event", - "ptrace thread event -> perf kernel event", - "ptrace thread event -> perf same thread event (Overlapping)", - "ptrace thread event -> perf same thread event (Non-overlapping)", - "ptrace thread event -> perf other thread event", - "ptrace thread event -> perf cpu event (Overlapping)", - "ptrace thread event -> perf cpu event (Non-overlapping)", - "ptrace thread event -> perf same thread & cpu event (Overlapping)", - "ptrace thread event -> perf same thread & cpu event (Non-overlapping)", - "ptrace thread event -> perf other thread & cpu event", -}; - -static int test(pid_t child_pid) -{ - int ret = TEST_PASS; - - ret |= do_test(desc[0], test1, child_pid); - ret |= do_test(desc[1], test2, child_pid); - ret |= do_test(desc[2], test3, child_pid); - ret |= do_test(desc[3], test4, child_pid); - ret |= do_test(desc[4], test5, child_pid); - ret |= do_test(desc[5], test6, child_pid); - ret |= do_test(desc[6], test7, child_pid); - ret |= do_test(desc[7], test8, child_pid); - ret |= do_test(desc[8], test9, child_pid); - ret |= do_test(desc[9], test10, child_pid); - ret |= do_test(desc[10], test11, child_pid); - ret |= do_test(desc[11], test12, child_pid); - ret |= do_test(desc[12], test13, child_pid); - ret |= do_test(desc[13], test14, child_pid); - - return ret; + return 0; } -static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) +/* + * Tests the interaction between ptrace and perf when: + * 1. perf watches a value + * 2. ptrace watches a different value + * 3. The perf value is read, then the ptrace value is read immediately after + * + * A breakpoint implementation may accidentally misattribute/skip one of + * the ptrace or perf handlers, as interrupt based work is done after perf + * and before ptrace. + * + * We expect the perf counter to increment before the ptrace watchpoint + * triggers. + */ +int perf_then_ptrace_test(void) { - if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) { - perror("Can't get breakpoint info"); - exit(-1); + struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */ + int bp_id; /* Breakpoint handle of ptrace watchpoint */ + int perf_fd; /* File descriptor of perf performance counter */ + u64 perf_count; /* Most recently fetched perf performance counter value */ + pid_t pid; /* PID of child process */ + void *pc; /* Most recently fetched child PC value */ + int status; /* Stop status of child after waitpid */ + unsigned long perf_value; /* Dummy value to be watched by perf */ + unsigned long ptrace_value; /* Dummy value to be watched by ptrace */ + int err; + + err = ptrace_fork_child(&pid); + if (err) + return err; + + /* + * If we are the child, run a subroutine that reads the perf value, + * then reads the ptrace value with consecutive load instructions + */ + if (!pid) { + perf_then_ptrace_child(&perf_value, &ptrace_value); + exit(0); } -} -static int ptrace_perf_hwbreak(void) -{ - int ret; - pid_t child_pid; - struct ppc_debug_info dbginfo; + err = check_watchpoints(pid); + if (err) + return err; - child_pid = fork(); - if (!child_pid) - return child(); + /* Place a perf watchpoint counter */ + perf_fd = perf_watchpoint_open(pid, &perf_value, sizeof(perf_value)); + FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter"); - /* parent */ - wait(NULL); /* <-- child (SIGUSR1) */ + /* Place a ptrace watchpoint */ + ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ, + &ptrace_value, sizeof(ptrace_value)); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); - get_dbginfo(child_pid, &dbginfo); - SKIP_IF(dbginfo.num_data_bps <= 1); + /* Let the child run. It should stop on the ptrace watchpoint */ + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); - ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - SKIP_IF(ret < 0); - close(ret); + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != perf_then_ptrace_load2, "Child did not stop on ptrace load"); - ret = test(child_pid); + /* perf should have recorded the first load */ + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 1, "perf counter did not increment"); - ptrace(PTRACE_CONT, child_pid, NULL, 0); - return ret; + /* Clean up child */ + FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child"); + + return 0; } int main(int argc, char *argv[]) { - return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak"); + int err = 0; + + err |= test_harness(same_watch_addr_test, "same_watch_addr"); + err |= test_harness(perf_then_ptrace_test, "perf_then_ptrace"); + + return err; } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index bc454f899124..d89474377f11 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -192,7 +192,7 @@ static int parent(struct shared_info *info, pid_t pid) * to the child. */ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3); - PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); + PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync, "PKEYs not supported"); PARENT_FAIL_IF(ret, &info->child_sync); info->amr1 = info->amr2 = regs[0]; diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c index 4436ca9d3caf..14726c77a6ce 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c @@ -79,7 +79,7 @@ int ptrace_tar(void) int ret, status; // TAR was added in v2.07 - SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_ARCH_2_07), "TAR requires ISA 2.07 compatible hardware"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); pid = fork(); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c index 5dc152b162df..7c70d62587c2 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c @@ -112,8 +112,8 @@ int ptrace_tm_gpr(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); pid = fork(); if (pid < 0) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c index 458cc1a70ccf..6c17ed099969 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c @@ -118,8 +118,8 @@ int ptrace_tm_spd_gpr(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); pid = fork(); if (pid < 0) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c index e112a34fbe59..afd8dc2e2097 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c @@ -128,8 +128,8 @@ int ptrace_tm_spd_tar(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); pid = fork(); if (pid == 0) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c index 40133d49fe39..14d2fac8f237 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c @@ -128,8 +128,8 @@ int ptrace_tm_spd_vsx(void) pid_t pid; int ret, status, i; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); for (i = 0; i < 128; i++) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c index 880ba6a29a48..e64cdb04cecf 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c @@ -113,8 +113,8 @@ int ptrace_tm_spr(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(struct shared), 0777|IPC_CREAT); shm_id1 = shmget(IPC_PRIVATE, sizeof(int), 0777|IPC_CREAT); pid = fork(); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c index d0db6df0f0ea..3963d4b0429f 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c @@ -116,8 +116,8 @@ int ptrace_tm_tar(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); pid = fork(); if (pid == 0) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c index 4f05ce4fd282..8c925d734a72 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c @@ -112,8 +112,8 @@ int ptrace_tm_vsx(void) pid_t pid; int ret, status, i; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); for (i = 0; i < 128; i++) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c index cb9875f764ca..11bc624574fe 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c @@ -61,7 +61,7 @@ int ptrace_vsx(void) pid_t pid; int ret, status, i; - SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX)); + SKIP_IF_MSG(!have_hwcap(PPC_FEATURE_HAS_VSX), "Don't have VSX"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); diff --git a/tools/testing/selftests/powerpc/stringloops/asm/export.h b/tools/testing/selftests/powerpc/stringloops/linux/export.h index 2d14a9b4248c..2d14a9b4248c 100644 --- a/tools/testing/selftests/powerpc/stringloops/asm/export.h +++ b/tools/testing/selftests/powerpc/stringloops/linux/export.h diff --git a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h b/tools/testing/selftests/powerpc/vphn/asm/lppaca.h deleted file mode 120000 index 942b1d00999c..000000000000 --- a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h +++ /dev/null @@ -1 +0,0 @@ -../../../../../../arch/powerpc/include/asm/lppaca.h
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/vphn/asm/vphn.h b/tools/testing/selftests/powerpc/vphn/asm/vphn.h new file mode 120000 index 000000000000..3a0b2a00171c --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/asm/vphn.h @@ -0,0 +1 @@ +../../../../../../arch/powerpc/include/asm/vphn.h
\ No newline at end of file diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index 7588428b8fcd..b16c13688b88 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -1,3 +1,4 @@ +#if defined __amd64__ || defined __i386__ /* * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com> * @@ -37,6 +38,10 @@ #include <sys/wait.h> #include <unistd.h> +#ifdef __amd64__ +#define TEST_VSYSCALL +#endif + /* * 0: vsyscall VMA doesn't exist vsyscall=none * 1: vsyscall VMA is --xp vsyscall=xonly @@ -77,7 +82,7 @@ static const char proc_pid_smaps_vsyscall_1[] = "Swap: 0 kB\n" "SwapPss: 0 kB\n" "Locked: 0 kB\n" -"THPeligible: 0\n" +"THPeligible: 0\n" /* * "ProtectionKey:" field is conditional. It is possible to check it as well, * but I don't have such machine. @@ -107,7 +112,7 @@ static const char proc_pid_smaps_vsyscall_2[] = "Swap: 0 kB\n" "SwapPss: 0 kB\n" "Locked: 0 kB\n" -"THPeligible: 0\n" +"THPeligible: 0\n" /* * "ProtectionKey:" field is conditional. It is possible to check it as well, * but I'm too tired. @@ -119,6 +124,7 @@ static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) _exit(EXIT_FAILURE); } +#ifdef TEST_VSYSCALL static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___) { _exit(g_vsyscall); @@ -170,6 +176,7 @@ static void vsyscall(void) exit(1); } } +#endif static int test_proc_pid_maps(pid_t pid) { @@ -299,7 +306,9 @@ int main(void) { int rv = EXIT_SUCCESS; +#ifdef TEST_VSYSCALL vsyscall(); +#endif switch (g_vsyscall) { case 0: @@ -346,6 +355,14 @@ int main(void) #ifdef __amd64__ munmap(NULL, ((size_t)1 << 47) - 4096); +#elif defined __i386__ + { + size_t len; + + for (len = -4096;; len -= 4096) { + munmap(NULL, len); + } + } #else #error "implement 'unmap everything'" #endif @@ -386,3 +403,9 @@ int main(void) return rv; } +#else +int main(void) +{ + return 4; +} +#endif diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile index f4b3d5c9af5b..4a9ff515a3a0 100644 --- a/tools/testing/selftests/riscv/Makefile +++ b/tools/testing/selftests/riscv/Makefile @@ -5,7 +5,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),riscv)) -RISCV_SUBTARGETS ?= hwprobe vector +RISCV_SUBTARGETS ?= hwprobe vector mm else RISCV_SUBTARGETS := endif diff --git a/tools/testing/selftests/riscv/mm/.gitignore b/tools/testing/selftests/riscv/mm/.gitignore new file mode 100644 index 000000000000..5c2c57cb950c --- /dev/null +++ b/tools/testing/selftests/riscv/mm/.gitignore @@ -0,0 +1,2 @@ +mmap_bottomup +mmap_default diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile new file mode 100644 index 000000000000..11e0f0568923 --- /dev/null +++ b/tools/testing/selftests/riscv/mm/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2021 ARM Limited +# Originally tools/testing/arm64/abi/Makefile + +# Additional include paths needed by kselftest.h and local headers +CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. + +TEST_GEN_FILES := testcases/mmap_default testcases/mmap_bottomup + +TEST_PROGS := testcases/run_mmap.sh + +include ../../lib.mk + +$(OUTPUT)/mm: testcases/mmap_default.c testcases/mmap_bottomup.c testcases/mmap_tests.h + $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c new file mode 100644 index 000000000000..b29379f7e478 --- /dev/null +++ b/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/mman.h> +#include <testcases/mmap_test.h> + +#include "../../kselftest_harness.h" + +TEST(infinite_rlimit) +{ +// Only works on 64 bit +#if __riscv_xlen == 64 + struct addresses mmap_addresses; + + EXPECT_EQ(BOTTOM_UP, memory_layout()); + + do_mmaps(&mmap_addresses); + + EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr); + + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr); + EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr); +#endif +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c b/tools/testing/selftests/riscv/mm/testcases/mmap_default.c new file mode 100644 index 000000000000..d1accb91b726 --- /dev/null +++ b/tools/testing/selftests/riscv/mm/testcases/mmap_default.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/mman.h> +#include <testcases/mmap_test.h> + +#include "../../kselftest_harness.h" + +TEST(default_rlimit) +{ +// Only works on 64 bit +#if __riscv_xlen == 64 + struct addresses mmap_addresses; + + EXPECT_EQ(TOP_DOWN, memory_layout()); + + do_mmaps(&mmap_addresses); + + EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr); + + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr); + EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr); +#endif +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h b/tools/testing/selftests/riscv/mm/testcases/mmap_test.h new file mode 100644 index 000000000000..9b8434f62f57 --- /dev/null +++ b/tools/testing/selftests/riscv/mm/testcases/mmap_test.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _TESTCASES_MMAP_TEST_H +#define _TESTCASES_MMAP_TEST_H +#include <sys/mman.h> +#include <sys/resource.h> +#include <stddef.h> + +#define TOP_DOWN 0 +#define BOTTOM_UP 1 + +struct addresses { + int *no_hint; + int *on_37_addr; + int *on_38_addr; + int *on_46_addr; + int *on_47_addr; + int *on_55_addr; + int *on_56_addr; +}; + +static inline void do_mmaps(struct addresses *mmap_addresses) +{ + /* + * Place all of the hint addresses on the boundaries of mmap + * sv39, sv48, sv57 + * User addresses end at 1<<38, 1<<47, 1<<56 respectively + */ + void *on_37_bits = (void *)(1UL << 37); + void *on_38_bits = (void *)(1UL << 38); + void *on_46_bits = (void *)(1UL << 46); + void *on_47_bits = (void *)(1UL << 47); + void *on_55_bits = (void *)(1UL << 55); + void *on_56_bits = (void *)(1UL << 56); + + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + + mmap_addresses->no_hint = + mmap(NULL, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_37_addr = + mmap(on_37_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_38_addr = + mmap(on_38_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_46_addr = + mmap(on_46_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_47_addr = + mmap(on_47_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_55_addr = + mmap(on_55_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_56_addr = + mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0); +} + +static inline int memory_layout(void) +{ + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + + void *value1 = mmap(NULL, sizeof(int), prot, flags, 0, 0); + void *value2 = mmap(NULL, sizeof(int), prot, flags, 0, 0); + + return value2 > value1; +} +#endif /* _TESTCASES_MMAP_TEST_H */ diff --git a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh b/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh new file mode 100755 index 000000000000..ca5ad7c48bad --- /dev/null +++ b/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +original_stack_limit=$(ulimit -s) + +./mmap_default + +# Force mmap_bottomup to be ran with bottomup memory due to +# the unlimited stack +ulimit -s unlimited +./mmap_bottomup +ulimit -s $original_stack_limit diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 6327c9c400e0..507555714b1d 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -41,7 +41,6 @@ CONFIG_KALLSYMS=y CONFIG_BUG=y CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y CONFIG_JUMP_LABEL=y -CONFIG_EMBEDDED=n CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_SHMEM=y diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 598135d3162b..7e8c937627dd 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -18,7 +18,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \ - corrupt_xstate_header amx lam + corrupt_xstate_header amx lam test_shadow_stack # Some selftests require 32bit support enabled also on 64bit systems TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall diff --git a/tools/testing/selftests/x86/test_shadow_stack.c b/tools/testing/selftests/x86/test_shadow_stack.c new file mode 100644 index 000000000000..757e6527f67e --- /dev/null +++ b/tools/testing/selftests/x86/test_shadow_stack.c @@ -0,0 +1,884 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This program test's basic kernel shadow stack support. It enables shadow + * stack manual via the arch_prctl(), instead of relying on glibc. It's + * Makefile doesn't compile with shadow stack support, so it doesn't rely on + * any particular glibc. As a result it can't do any operations that require + * special glibc shadow stack support (longjmp(), swapcontext(), etc). Just + * stick to the basics and hope the compiler doesn't do anything strange. + */ + +#define _GNU_SOURCE + +#include <sys/syscall.h> +#include <asm/mman.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdbool.h> +#include <x86intrin.h> +#include <asm/prctl.h> +#include <sys/prctl.h> +#include <stdint.h> +#include <signal.h> +#include <pthread.h> +#include <sys/ioctl.h> +#include <linux/userfaultfd.h> +#include <setjmp.h> +#include <sys/ptrace.h> +#include <sys/signal.h> +#include <linux/elf.h> + +/* + * Define the ABI defines if needed, so people can run the tests + * without building the headers. + */ +#ifndef __NR_map_shadow_stack +#define __NR_map_shadow_stack 453 + +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) + +#define ARCH_SHSTK_ENABLE 0x5001 +#define ARCH_SHSTK_DISABLE 0x5002 +#define ARCH_SHSTK_LOCK 0x5003 +#define ARCH_SHSTK_UNLOCK 0x5004 +#define ARCH_SHSTK_STATUS 0x5005 + +#define ARCH_SHSTK_SHSTK (1ULL << 0) +#define ARCH_SHSTK_WRSS (1ULL << 1) + +#define NT_X86_SHSTK 0x204 +#endif + +#define SS_SIZE 0x200000 +#define PAGE_SIZE 0x1000 + +#if (__GNUC__ < 8) || (__GNUC__ == 8 && __GNUC_MINOR__ < 5) +int main(int argc, char *argv[]) +{ + printf("[SKIP]\tCompiler does not support CET.\n"); + return 0; +} +#else +void write_shstk(unsigned long *addr, unsigned long val) +{ + asm volatile("wrssq %[val], (%[addr])\n" + : "=m" (addr) + : [addr] "r" (addr), [val] "r" (val)); +} + +static inline unsigned long __attribute__((always_inline)) get_ssp(void) +{ + unsigned long ret = 0; + + asm volatile("xor %0, %0; rdsspq %0" : "=r" (ret)); + return ret; +} + +/* + * For use in inline enablement of shadow stack. + * + * The program can't return from the point where shadow stack gets enabled + * because there will be no address on the shadow stack. So it can't use + * syscall() for enablement, since it is a function. + * + * Based on code from nolibc.h. Keep a copy here because this can't pull in all + * of nolibc.h. + */ +#define ARCH_PRCTL(arg1, arg2) \ +({ \ + long _ret; \ + register long _num asm("eax") = __NR_arch_prctl; \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +void *create_shstk(void *addr) +{ + return (void *)syscall(__NR_map_shadow_stack, addr, SS_SIZE, SHADOW_STACK_SET_TOKEN); +} + +void *create_normal_mem(void *addr) +{ + return mmap(addr, SS_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); +} + +void free_shstk(void *shstk) +{ + munmap(shstk, SS_SIZE); +} + +int reset_shstk(void *shstk) +{ + return madvise(shstk, SS_SIZE, MADV_DONTNEED); +} + +void try_shstk(unsigned long new_ssp) +{ + unsigned long ssp; + + printf("[INFO]\tnew_ssp = %lx, *new_ssp = %lx\n", + new_ssp, *((unsigned long *)new_ssp)); + + ssp = get_ssp(); + printf("[INFO]\tchanging ssp from %lx to %lx\n", ssp, new_ssp); + + asm volatile("rstorssp (%0)\n":: "r" (new_ssp)); + asm volatile("saveprevssp"); + printf("[INFO]\tssp is now %lx\n", get_ssp()); + + /* Switch back to original shadow stack */ + ssp -= 8; + asm volatile("rstorssp (%0)\n":: "r" (ssp)); + asm volatile("saveprevssp"); +} + +int test_shstk_pivot(void) +{ + void *shstk = create_shstk(0); + + if (shstk == MAP_FAILED) { + printf("[FAIL]\tError creating shadow stack: %d\n", errno); + return 1; + } + try_shstk((unsigned long)shstk + SS_SIZE - 8); + free_shstk(shstk); + + printf("[OK]\tShadow stack pivot\n"); + return 0; +} + +int test_shstk_faults(void) +{ + unsigned long *shstk = create_shstk(0); + + /* Read shadow stack, test if it's zero to not get read optimized out */ + if (*shstk != 0) + goto err; + + /* Wrss memory that was already read. */ + write_shstk(shstk, 1); + if (*shstk != 1) + goto err; + + /* Page out memory, so we can wrss it again. */ + if (reset_shstk((void *)shstk)) + goto err; + + write_shstk(shstk, 1); + if (*shstk != 1) + goto err; + + printf("[OK]\tShadow stack faults\n"); + return 0; + +err: + return 1; +} + +unsigned long saved_ssp; +unsigned long saved_ssp_val; +volatile bool segv_triggered; + +void __attribute__((noinline)) violate_ss(void) +{ + saved_ssp = get_ssp(); + saved_ssp_val = *(unsigned long *)saved_ssp; + + /* Corrupt shadow stack */ + printf("[INFO]\tCorrupting shadow stack\n"); + write_shstk((void *)saved_ssp, 0); +} + +void segv_handler(int signum, siginfo_t *si, void *uc) +{ + printf("[INFO]\tGenerated shadow stack violation successfully\n"); + + segv_triggered = true; + + /* Fix shadow stack */ + write_shstk((void *)saved_ssp, saved_ssp_val); +} + +int test_shstk_violation(void) +{ + struct sigaction sa = {}; + + sa.sa_sigaction = segv_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + segv_triggered = false; + + /* Make sure segv_triggered is set before violate_ss() */ + asm volatile("" : : : "memory"); + + violate_ss(); + + signal(SIGSEGV, SIG_DFL); + + printf("[OK]\tShadow stack violation test\n"); + + return !segv_triggered; +} + +/* Gup test state */ +#define MAGIC_VAL 0x12345678 +bool is_shstk_access; +void *shstk_ptr; +int fd; + +void reset_test_shstk(void *addr) +{ + if (shstk_ptr) + free_shstk(shstk_ptr); + shstk_ptr = create_shstk(addr); +} + +void test_access_fix_handler(int signum, siginfo_t *si, void *uc) +{ + printf("[INFO]\tViolation from %s\n", is_shstk_access ? "shstk access" : "normal write"); + + segv_triggered = true; + + /* Fix shadow stack */ + if (is_shstk_access) { + reset_test_shstk(shstk_ptr); + return; + } + + free_shstk(shstk_ptr); + create_normal_mem(shstk_ptr); +} + +bool test_shstk_access(void *ptr) +{ + is_shstk_access = true; + segv_triggered = false; + write_shstk(ptr, MAGIC_VAL); + + asm volatile("" : : : "memory"); + + return segv_triggered; +} + +bool test_write_access(void *ptr) +{ + is_shstk_access = false; + segv_triggered = false; + *(unsigned long *)ptr = MAGIC_VAL; + + asm volatile("" : : : "memory"); + + return segv_triggered; +} + +bool gup_write(void *ptr) +{ + unsigned long val; + + lseek(fd, (unsigned long)ptr, SEEK_SET); + if (write(fd, &val, sizeof(val)) < 0) + return 1; + + return 0; +} + +bool gup_read(void *ptr) +{ + unsigned long val; + + lseek(fd, (unsigned long)ptr, SEEK_SET); + if (read(fd, &val, sizeof(val)) < 0) + return 1; + + return 0; +} + +int test_gup(void) +{ + struct sigaction sa = {}; + int status; + pid_t pid; + + sa.sa_sigaction = test_access_fix_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + segv_triggered = false; + + fd = open("/proc/self/mem", O_RDWR); + if (fd == -1) + return 1; + + reset_test_shstk(0); + if (gup_read(shstk_ptr)) + return 1; + if (test_shstk_access(shstk_ptr)) + return 1; + printf("[INFO]\tGup read -> shstk access success\n"); + + reset_test_shstk(0); + if (gup_write(shstk_ptr)) + return 1; + if (test_shstk_access(shstk_ptr)) + return 1; + printf("[INFO]\tGup write -> shstk access success\n"); + + reset_test_shstk(0); + if (gup_read(shstk_ptr)) + return 1; + if (!test_write_access(shstk_ptr)) + return 1; + printf("[INFO]\tGup read -> write access success\n"); + + reset_test_shstk(0); + if (gup_write(shstk_ptr)) + return 1; + if (!test_write_access(shstk_ptr)) + return 1; + printf("[INFO]\tGup write -> write access success\n"); + + close(fd); + + /* COW/gup test */ + reset_test_shstk(0); + pid = fork(); + if (!pid) { + fd = open("/proc/self/mem", O_RDWR); + if (fd == -1) + exit(1); + + if (gup_write(shstk_ptr)) { + close(fd); + exit(1); + } + close(fd); + exit(0); + } + waitpid(pid, &status, 0); + if (WEXITSTATUS(status)) { + printf("[FAIL]\tWrite in child failed\n"); + return 1; + } + if (*(unsigned long *)shstk_ptr == MAGIC_VAL) { + printf("[FAIL]\tWrite in child wrote through to shared memory\n"); + return 1; + } + + printf("[INFO]\tCow gup write -> write access success\n"); + + free_shstk(shstk_ptr); + + signal(SIGSEGV, SIG_DFL); + + printf("[OK]\tShadow gup test\n"); + + return 0; +} + +int test_mprotect(void) +{ + struct sigaction sa = {}; + + sa.sa_sigaction = test_access_fix_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + segv_triggered = false; + + /* mprotect a shadow stack as read only */ + reset_test_shstk(0); + if (mprotect(shstk_ptr, SS_SIZE, PROT_READ) < 0) { + printf("[FAIL]\tmprotect(PROT_READ) failed\n"); + return 1; + } + + /* try to wrss it and fail */ + if (!test_shstk_access(shstk_ptr)) { + printf("[FAIL]\tShadow stack access to read-only memory succeeded\n"); + return 1; + } + + /* + * The shadow stack was reset above to resolve the fault, make the new one + * read-only. + */ + if (mprotect(shstk_ptr, SS_SIZE, PROT_READ) < 0) { + printf("[FAIL]\tmprotect(PROT_READ) failed\n"); + return 1; + } + + /* then back to writable */ + if (mprotect(shstk_ptr, SS_SIZE, PROT_WRITE | PROT_READ) < 0) { + printf("[FAIL]\tmprotect(PROT_WRITE) failed\n"); + return 1; + } + + /* then wrss to it and succeed */ + if (test_shstk_access(shstk_ptr)) { + printf("[FAIL]\tShadow stack access to mprotect() writable memory failed\n"); + return 1; + } + + free_shstk(shstk_ptr); + + signal(SIGSEGV, SIG_DFL); + + printf("[OK]\tmprotect() test\n"); + + return 0; +} + +char zero[4096]; + +static void *uffd_thread(void *arg) +{ + struct uffdio_copy req; + int uffd = *(int *)arg; + struct uffd_msg msg; + int ret; + + while (1) { + ret = read(uffd, &msg, sizeof(msg)); + if (ret > 0) + break; + else if (errno == EAGAIN) + continue; + return (void *)1; + } + + req.dst = msg.arg.pagefault.address; + req.src = (__u64)zero; + req.len = 4096; + req.mode = 0; + + if (ioctl(uffd, UFFDIO_COPY, &req)) + return (void *)1; + + return (void *)0; +} + +int test_userfaultfd(void) +{ + struct uffdio_register uffdio_register; + struct uffdio_api uffdio_api; + struct sigaction sa = {}; + pthread_t thread; + void *res; + int uffd; + + sa.sa_sigaction = test_access_fix_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + printf("[SKIP]\tUserfaultfd unavailable.\n"); + return 0; + } + + reset_test_shstk(0); + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffdio_api)) + goto err; + + uffdio_register.range.start = (__u64)shstk_ptr; + uffdio_register.range.len = 4096; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) + goto err; + + if (pthread_create(&thread, NULL, &uffd_thread, &uffd)) + goto err; + + reset_shstk(shstk_ptr); + test_shstk_access(shstk_ptr); + + if (pthread_join(thread, &res)) + goto err; + + if (test_shstk_access(shstk_ptr)) + goto err; + + free_shstk(shstk_ptr); + + signal(SIGSEGV, SIG_DFL); + + if (!res) + printf("[OK]\tUserfaultfd test\n"); + return !!res; +err: + free_shstk(shstk_ptr); + close(uffd); + signal(SIGSEGV, SIG_DFL); + return 1; +} + +/* Simple linked list for keeping track of mappings in test_guard_gap() */ +struct node { + struct node *next; + void *mapping; +}; + +/* + * This tests whether mmap will place other mappings in a shadow stack's guard + * gap. The steps are: + * 1. Finds an empty place by mapping and unmapping something. + * 2. Map a shadow stack in the middle of the known empty area. + * 3. Map a bunch of PAGE_SIZE mappings. These will use the search down + * direction, filling any gaps until it encounters the shadow stack's + * guard gap. + * 4. When a mapping lands below the shadow stack from step 2, then all + * of the above gaps are filled. The search down algorithm will have + * looked at the shadow stack gaps. + * 5. See if it landed in the gap. + */ +int test_guard_gap(void) +{ + void *free_area, *shstk, *test_map = (void *)0xFFFFFFFFFFFFFFFF; + struct node *head = NULL, *cur; + + free_area = mmap(0, SS_SIZE * 3, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + munmap(free_area, SS_SIZE * 3); + + shstk = create_shstk(free_area + SS_SIZE); + if (shstk == MAP_FAILED) + return 1; + + while (test_map > shstk) { + test_map = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (test_map == MAP_FAILED) + return 1; + cur = malloc(sizeof(*cur)); + cur->mapping = test_map; + + cur->next = head; + head = cur; + } + + while (head) { + cur = head; + head = cur->next; + munmap(cur->mapping, PAGE_SIZE); + free(cur); + } + + free_shstk(shstk); + + if (shstk - test_map - PAGE_SIZE != PAGE_SIZE) + return 1; + + printf("[OK]\tGuard gap test\n"); + + return 0; +} + +/* + * Too complicated to pull it out of the 32 bit header, but also get the + * 64 bit one needed above. Just define a copy here. + */ +#define __NR_compat_sigaction 67 + +/* + * Call 32 bit signal handler to get 32 bit signals ABI. Make sure + * to push the registers that will get clobbered. + */ +int sigaction32(int signum, const struct sigaction *restrict act, + struct sigaction *restrict oldact) +{ + register long syscall_reg asm("eax") = __NR_compat_sigaction; + register long signum_reg asm("ebx") = signum; + register long act_reg asm("ecx") = (long)act; + register long oldact_reg asm("edx") = (long)oldact; + int ret = 0; + + asm volatile ("int $0x80;" + : "=a"(ret), "=m"(oldact) + : "r"(syscall_reg), "r"(signum_reg), "r"(act_reg), + "r"(oldact_reg) + : "r8", "r9", "r10", "r11" + ); + + return ret; +} + +sigjmp_buf jmp_buffer; + +void segv_gp_handler(int signum, siginfo_t *si, void *uc) +{ + segv_triggered = true; + + /* + * To work with old glibc, this can't rely on siglongjmp working with + * shadow stack enabled, so disable shadow stack before siglongjmp(). + */ + ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK); + siglongjmp(jmp_buffer, -1); +} + +/* + * Transition to 32 bit mode and check that a #GP triggers a segfault. + */ +int test_32bit(void) +{ + struct sigaction sa = {}; + struct sigaction *sa32; + + /* Create sigaction in 32 bit address range */ + sa32 = mmap(0, 4096, PROT_READ | PROT_WRITE, + MAP_32BIT | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + sa32->sa_flags = SA_SIGINFO; + + sa.sa_sigaction = segv_gp_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + + segv_triggered = false; + + /* Make sure segv_triggered is set before triggering the #GP */ + asm volatile("" : : : "memory"); + + /* + * Set handler to somewhere in 32 bit address space + */ + sa32->sa_handler = (void *)sa32; + if (sigaction32(SIGUSR1, sa32, NULL)) + return 1; + + if (!sigsetjmp(jmp_buffer, 1)) + raise(SIGUSR1); + + if (segv_triggered) + printf("[OK]\t32 bit test\n"); + + return !segv_triggered; +} + +void segv_handler_ptrace(int signum, siginfo_t *si, void *uc) +{ + /* The SSP adjustment caused a segfault. */ + exit(0); +} + +int test_ptrace(void) +{ + unsigned long saved_ssp, ssp = 0; + struct sigaction sa= {}; + struct iovec iov; + int status; + int pid; + + iov.iov_base = &ssp; + iov.iov_len = sizeof(ssp); + + pid = fork(); + if (!pid) { + ssp = get_ssp(); + + sa.sa_sigaction = segv_handler_ptrace; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + ptrace(PTRACE_TRACEME, NULL, NULL, NULL); + /* + * The parent will tweak the SSP and return from this function + * will #CP. + */ + raise(SIGTRAP); + + exit(1); + } + + while (waitpid(pid, &status, 0) != -1 && WSTOPSIG(status) != SIGTRAP); + + if (ptrace(PTRACE_GETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tFailed to PTRACE_GETREGS\n"); + goto out_kill; + } + + if (!ssp) { + printf("[INFO]\tPtrace child SSP was 0\n"); + goto out_kill; + } + + saved_ssp = ssp; + + iov.iov_len = 0; + if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tToo small size accepted via PTRACE_SETREGS\n"); + goto out_kill; + } + + iov.iov_len = sizeof(ssp) + 1; + if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tToo large size accepted via PTRACE_SETREGS\n"); + goto out_kill; + } + + ssp += 1; + if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tUnaligned SSP written via PTRACE_SETREGS\n"); + goto out_kill; + } + + ssp = 0xFFFFFFFFFFFF0000; + if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tKernel range SSP written via PTRACE_SETREGS\n"); + goto out_kill; + } + + /* + * Tweak the SSP so the child with #CP when it resumes and returns + * from raise() + */ + ssp = saved_ssp + 8; + iov.iov_len = sizeof(ssp); + if (ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tFailed to PTRACE_SETREGS\n"); + goto out_kill; + } + + if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) { + printf("[INFO]\tFailed to PTRACE_DETACH\n"); + goto out_kill; + } + + waitpid(pid, &status, 0); + if (WEXITSTATUS(status)) + return 1; + + printf("[OK]\tPtrace test\n"); + return 0; + +out_kill: + kill(pid, SIGKILL); + return 1; +} + +int main(int argc, char *argv[]) +{ + int ret = 0; + + if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) { + printf("[SKIP]\tCould not enable Shadow stack\n"); + return 1; + } + + if (ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) { + ret = 1; + printf("[FAIL]\tDisabling shadow stack failed\n"); + } + + if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) { + printf("[SKIP]\tCould not re-enable Shadow stack\n"); + return 1; + } + + if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_WRSS)) { + printf("[SKIP]\tCould not enable WRSS\n"); + ret = 1; + goto out; + } + + /* Should have succeeded if here, but this is a test, so double check. */ + if (!get_ssp()) { + printf("[FAIL]\tShadow stack disabled\n"); + return 1; + } + + if (test_shstk_pivot()) { + ret = 1; + printf("[FAIL]\tShadow stack pivot\n"); + goto out; + } + + if (test_shstk_faults()) { + ret = 1; + printf("[FAIL]\tShadow stack fault test\n"); + goto out; + } + + if (test_shstk_violation()) { + ret = 1; + printf("[FAIL]\tShadow stack violation test\n"); + goto out; + } + + if (test_gup()) { + ret = 1; + printf("[FAIL]\tShadow shadow stack gup\n"); + goto out; + } + + if (test_mprotect()) { + ret = 1; + printf("[FAIL]\tShadow shadow mprotect test\n"); + goto out; + } + + if (test_userfaultfd()) { + ret = 1; + printf("[FAIL]\tUserfaultfd test\n"); + goto out; + } + + if (test_guard_gap()) { + ret = 1; + printf("[FAIL]\tGuard gap test\n"); + goto out; + } + + if (test_ptrace()) { + ret = 1; + printf("[FAIL]\tptrace test\n"); + } + + if (test_32bit()) { + ret = 1; + printf("[FAIL]\t32 bit test\n"); + goto out; + } + + return ret; + +out: + /* + * Disable shadow stack before the function returns, or there will be a + * shadow stack violation. + */ + if (ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) { + ret = 1; + printf("[FAIL]\tDisabling shadow stack failed\n"); + } + + return ret; +} +#endif diff --git a/tools/workqueue/wq_dump.py b/tools/workqueue/wq_dump.py new file mode 100644 index 000000000000..d0df5833f2c1 --- /dev/null +++ b/tools/workqueue/wq_dump.py @@ -0,0 +1,177 @@ +#!/usr/bin/env drgn +# +# Copyright (C) 2023 Tejun Heo <tj@kernel.org> +# Copyright (C) 2023 Meta Platforms, Inc. and affiliates. + +desc = """ +This is a drgn script to show the current workqueue configuration. For more +info on drgn, visit https://github.com/osandov/drgn. + +Affinity Scopes +=============== + +Shows the CPUs that can be used for unbound workqueues and how they will be +grouped by each available affinity type. For each type: + + nr_pods number of CPU pods in the affinity type + pod_cpus CPUs in each pod + pod_node NUMA node for memory allocation for each pod + cpu_pod pod that each CPU is associated to + +Worker Pools +============ + +Lists all worker pools indexed by their ID. For each pool: + + ref number of pool_workqueue's associated with this pool + nice nice value of the worker threads in the pool + idle number of idle workers + workers number of all workers + cpu CPU the pool is associated with (per-cpu pool) + cpus CPUs the workers in the pool can run on (unbound pool) + +Workqueue CPU -> pool +===================== + +Lists all workqueues along with their type and worker pool association. For +each workqueue: + + NAME TYPE[,FLAGS] POOL_ID... + + NAME name of the workqueue + TYPE percpu, unbound or ordered + FLAGS S: strict affinity scope + POOL_ID worker pool ID associated with each possible CPU +""" + +import sys + +import drgn +from drgn.helpers.linux.list import list_for_each_entry,list_empty +from drgn.helpers.linux.percpu import per_cpu_ptr +from drgn.helpers.linux.cpumask import for_each_cpu,for_each_possible_cpu +from drgn.helpers.linux.idr import idr_for_each + +import argparse +parser = argparse.ArgumentParser(description=desc, + formatter_class=argparse.RawTextHelpFormatter) +args = parser.parse_args() + +def err(s): + print(s, file=sys.stderr, flush=True) + sys.exit(1) + +def cpumask_str(cpumask): + output = "" + base = 0 + v = 0 + for cpu in for_each_cpu(cpumask[0]): + while cpu - base >= 32: + output += f'{hex(v)} ' + base += 32 + v = 0 + v |= 1 << (cpu - base) + if v > 0: + output += f'{v:08x}' + return output.strip() + +worker_pool_idr = prog['worker_pool_idr'] +workqueues = prog['workqueues'] +wq_unbound_cpumask = prog['wq_unbound_cpumask'] +wq_pod_types = prog['wq_pod_types'] +wq_affn_dfl = prog['wq_affn_dfl'] +wq_affn_names = prog['wq_affn_names'] + +WQ_UNBOUND = prog['WQ_UNBOUND'] +WQ_ORDERED = prog['__WQ_ORDERED'] +WQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM'] + +WQ_AFFN_CPU = prog['WQ_AFFN_CPU'] +WQ_AFFN_SMT = prog['WQ_AFFN_SMT'] +WQ_AFFN_CACHE = prog['WQ_AFFN_CACHE'] +WQ_AFFN_NUMA = prog['WQ_AFFN_NUMA'] +WQ_AFFN_SYSTEM = prog['WQ_AFFN_SYSTEM'] + +print('Affinity Scopes') +print('===============') + +print(f'wq_unbound_cpumask={cpumask_str(wq_unbound_cpumask)}') + +def print_pod_type(pt): + print(f' nr_pods {pt.nr_pods.value_()}') + + print(' pod_cpus', end='') + for pod in range(pt.nr_pods): + print(f' [{pod}]={cpumask_str(pt.pod_cpus[pod])}', end='') + print('') + + print(' pod_node', end='') + for pod in range(pt.nr_pods): + print(f' [{pod}]={pt.pod_node[pod].value_()}', end='') + print('') + + print(f' cpu_pod ', end='') + for cpu in for_each_possible_cpu(prog): + print(f' [{cpu}]={pt.cpu_pod[cpu].value_()}', end='') + print('') + +for affn in [WQ_AFFN_CPU, WQ_AFFN_SMT, WQ_AFFN_CACHE, WQ_AFFN_NUMA, WQ_AFFN_SYSTEM]: + print('') + print(f'{wq_affn_names[affn].string_().decode().upper()}{" (default)" if affn == wq_affn_dfl else ""}') + print_pod_type(wq_pod_types[affn]) + +print('') +print('Worker Pools') +print('============') + +max_pool_id_len = 0 +max_ref_len = 0 +for pi, pool in idr_for_each(worker_pool_idr): + pool = drgn.Object(prog, 'struct worker_pool', address=pool) + max_pool_id_len = max(max_pool_id_len, len(f'{pi}')) + max_ref_len = max(max_ref_len, len(f'{pool.refcnt.value_()}')) + +for pi, pool in idr_for_each(worker_pool_idr): + pool = drgn.Object(prog, 'struct worker_pool', address=pool) + print(f'pool[{pi:0{max_pool_id_len}}] ref={pool.refcnt.value_():{max_ref_len}} nice={pool.attrs.nice.value_():3} ', end='') + print(f'idle/workers={pool.nr_idle.value_():3}/{pool.nr_workers.value_():3} ', end='') + if pool.cpu >= 0: + print(f'cpu={pool.cpu.value_():3}', end='') + else: + print(f'cpus={cpumask_str(pool.attrs.cpumask)}', end='') + print(f' pod_cpus={cpumask_str(pool.attrs.__pod_cpumask)}', end='') + if pool.attrs.affn_strict: + print(' strict', end='') + print('') + +print('') +print('Workqueue CPU -> pool') +print('=====================') + +print('[ workqueue \ type CPU', end='') +for cpu in for_each_possible_cpu(prog): + print(f' {cpu:{max_pool_id_len}}', end='') +print(' dfl]') + +for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'): + print(f'{wq.name.string_().decode()[-24:]:24}', end='') + if wq.flags & WQ_UNBOUND: + if wq.flags & WQ_ORDERED: + print(' ordered ', end='') + else: + print(' unbound', end='') + if wq.unbound_attrs.affn_strict: + print(',S ', end='') + else: + print(' ', end='') + else: + print(' percpu ', end='') + + for cpu in for_each_possible_cpu(prog): + pool_id = per_cpu_ptr(wq.cpu_pwq, cpu)[0].pool.id.value_() + field_len = max(len(str(cpu)), max_pool_id_len) + print(f' {pool_id:{field_len}}', end='') + + if wq.flags & WQ_UNBOUND: + print(f' {wq.dfl_pwq.pool.id.value_():{max_pool_id_len}}', end='') + print('') diff --git a/tools/workqueue/wq_monitor.py b/tools/workqueue/wq_monitor.py index 6e258d123e8c..a8856a9c45dc 100644 --- a/tools/workqueue/wq_monitor.py +++ b/tools/workqueue/wq_monitor.py @@ -20,8 +20,11 @@ https://github.com/osandov/drgn. and got excluded from concurrency management to avoid stalling other work items. - CMwake The number of concurrency-management wake-ups while executing a - work item of the workqueue. + CMW/RPR For per-cpu workqueues, the number of concurrency-management + wake-ups while executing a work item of the workqueue. For + unbound workqueues, the number of times a worker was repatriated + to its affinity scope after being migrated to an off-scope CPU by + the scheduler. mayday The number of times the rescuer was requested while waiting for new worker creation. @@ -65,6 +68,7 @@ PWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed exec PWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME'] # total CPU time consumed PWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations PWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups +PWQ_STAT_REPATRIATED = prog['PWQ_STAT_REPATRIATED'] # unbound workers brought back into scope PWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer PWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer PWQ_NR_STATS = prog['PWQ_NR_STATS'] @@ -89,22 +93,25 @@ class WqStats: 'cpu_time' : self.stats[PWQ_STAT_CPU_TIME], 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE], 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP], + 'repatriated' : self.stats[PWQ_STAT_REPATRIATED], 'mayday' : self.stats[PWQ_STAT_MAYDAY], 'rescued' : self.stats[PWQ_STAT_RESCUED], } def table_header_str(): return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\ - f'{"CPUitsv":>7} {"CMwake":>7} {"mayday":>7} {"rescued":>7}' + f'{"CPUitsv":>7} {"CMW/RPR":>7} {"mayday":>7} {"rescued":>7}' def table_row_str(self): cpu_intensive = '-' - cm_wakeup = '-' + cmw_rpr = '-' mayday = '-' rescued = '-' - if not self.unbound: + if self.unbound: + cmw_rpr = str(self.stats[PWQ_STAT_REPATRIATED]); + else: cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE]) - cm_wakeup = str(self.stats[PWQ_STAT_CM_WAKEUP]) + cmw_rpr = str(self.stats[PWQ_STAT_CM_WAKEUP]) if self.mem_reclaim: mayday = str(self.stats[PWQ_STAT_MAYDAY]) @@ -115,7 +122,7 @@ class WqStats: f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \ f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \ f'{cpu_intensive:>7} ' \ - f'{cm_wakeup:>7} ' \ + f'{cmw_rpr:>7} ' \ f'{mayday:>7} ' \ f'{rescued:>7} ' return out.rstrip(':') |