aboutsummaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests')
-rw-r--r--tools/testing/selftests/Makefile16
-rw-r--r--tools/testing/selftests/android/Makefile8
-rw-r--r--tools/testing/selftests/android/ion/.gitignore1
-rw-r--r--tools/testing/selftests/android/ion/Makefile5
-rw-r--r--tools/testing/selftests/android/ion/config1
-rwxr-xr-xtools/testing/selftests/android/ion/ion_test.sh7
-rw-r--r--tools/testing/selftests/android/ion/ionmap_test.c136
-rw-r--r--tools/testing/selftests/android/ion/ionutils.c6
-rw-r--r--tools/testing/selftests/bpf/.gitignore3
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/test_progs.c4
-rw-r--r--tools/testing/selftests/bpf/test_sock.c1
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c1
-rwxr-xr-xtools/testing/selftests/bpf/test_sock_addr.sh4
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c5
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c6
-rw-r--r--tools/testing/selftests/cgroup/Makefile10
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c331
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h41
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c1015
-rwxr-xr-xtools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh14
-rwxr-xr-xtools/testing/selftests/cpufreq/main.sh5
-rwxr-xr-xtools/testing/selftests/drivers/usb/usbip/usbip_test.sh198
-rwxr-xr-xtools/testing/selftests/efivarfs/efivarfs.sh7
-rw-r--r--tools/testing/selftests/exec/execveat.c6
-rw-r--r--tools/testing/selftests/filesystems/Makefile9
-rw-r--r--tools/testing/selftests/filesystems/devpts_pts.c15
-rw-r--r--tools/testing/selftests/firmware/Makefile1
-rwxr-xr-xtools/testing/selftests/firmware/fw_fallback.sh4
-rwxr-xr-xtools/testing/selftests/firmware/fw_filesystem.sh4
-rwxr-xr-xtools/testing/selftests/firmware/fw_lib.sh17
-rwxr-xr-xtools/testing/selftests/firmware/fw_run_tests.sh2
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions7
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc39
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc54
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc58
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc44
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc50
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc50
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc48
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc54
-rw-r--r--tools/testing/selftests/futex/Makefile8
-rwxr-xr-xtools/testing/selftests/gpio/gpio-mockup.sh12
-rw-r--r--tools/testing/selftests/intel_pstate/Makefile5
-rw-r--r--tools/testing/selftests/intel_pstate/aperf.c6
-rwxr-xr-xtools/testing/selftests/intel_pstate/run.sh47
-rw-r--r--tools/testing/selftests/ipc/msgque.c7
-rwxr-xr-xtools/testing/selftests/kmod/kmod.sh13
-rw-r--r--tools/testing/selftests/kselftest.h5
-rw-r--r--tools/testing/selftests/kselftest_harness.h26
-rw-r--r--tools/testing/selftests/kvm/.gitignore3
-rw-r--r--tools/testing/selftests/kvm/Makefile40
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h145
-rw-r--r--tools/testing/selftests/kvm/include/sparsebit.h75
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h46
-rw-r--r--tools/testing/selftests/kvm/include/vmx.h494
-rw-r--r--tools/testing/selftests/kvm/include/x86.h1043
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c92
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c197
-rw-r--r--tools/testing/selftests/kvm/lib/io.c158
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c1486
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h67
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c2087
-rw-r--r--tools/testing/selftests/kvm/lib/vmx.c243
-rw-r--r--tools/testing/selftests/kvm/lib/x86.c700
-rw-r--r--tools/testing/selftests/kvm/set_sregs_test.c54
-rw-r--r--tools/testing/selftests/kvm/sync_regs_test.c254
-rw-r--r--tools/testing/selftests/kvm/vmx_tsc_adjust_test.c233
-rw-r--r--tools/testing/selftests/lib.mk62
-rw-r--r--tools/testing/selftests/lib/Makefile2
-rwxr-xr-xtools/testing/selftests/lib/bitmap.sh8
-rwxr-xr-xtools/testing/selftests/lib/prime_numbers.sh9
-rwxr-xr-xtools/testing/selftests/lib/printf.sh8
-rw-r--r--tools/testing/selftests/locking/Makefile10
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/locking/ww_mutex.sh8
-rw-r--r--tools/testing/selftests/media_tests/Makefile3
-rw-r--r--tools/testing/selftests/media_tests/media_device_open.c8
-rw-r--r--tools/testing/selftests/media_tests/media_device_test.c10
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test.c7
-rw-r--r--tools/testing/selftests/memfd/Makefile6
-rwxr-xr-xtools/testing/selftests/memfd/run_hugetlbfs_test.sh (renamed from tools/testing/selftests/memfd/run_tests.sh)18
-rw-r--r--tools/testing/selftests/memory-hotplug/Makefile5
-rwxr-xr-xtools/testing/selftests/memory-hotplug/mem-on-off-test.sh14
-rw-r--r--tools/testing/selftests/mount/Makefile12
-rwxr-xr-xtools/testing/selftests/mount/run_tests.sh12
-rw-r--r--tools/testing/selftests/mqueue/Makefile12
-rw-r--r--tools/testing/selftests/mqueue/mq_open_tests.c37
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c8
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/config5
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh8
-rwxr-xr-xtools/testing/selftests/net/netdevice.sh16
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh5
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c4
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c4
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh31
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/exec_target.c13
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/fork.c325
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-sigreturn.c92
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c24
-rw-r--r--tools/testing/selftests/proc/.gitignore8
-rw-r--r--tools/testing/selftests/proc/Makefile13
-rw-r--r--tools/testing/selftests/proc/config1
-rw-r--r--tools/testing/selftests/proc/proc-loadavg-001.c83
-rw-r--r--tools/testing/selftests/proc/proc-self-map-files-001.c82
-rw-r--r--tools/testing/selftests/proc/proc-self-map-files-002.c85
-rw-r--r--tools/testing/selftests/proc/proc-self-syscall.c60
-rw-r--r--tools/testing/selftests/proc/proc-self-wchan.c40
-rw-r--r--tools/testing/selftests/proc/proc-uptime-001.c45
-rw-r--r--tools/testing/selftests/proc/proc-uptime-002.c79
-rw-r--r--tools/testing/selftests/proc/proc-uptime.h74
-rw-r--r--tools/testing/selftests/proc/read.c147
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh56
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh12
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh115
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-torture.sh105
-rw-r--r--tools/testing/selftests/rtc/.gitignore2
-rw-r--r--tools/testing/selftests/rtc/Makefile9
-rw-r--r--tools/testing/selftests/rtc/rtctest.c238
-rw-r--r--tools/testing/selftests/rtc/setdate.c (renamed from tools/testing/selftests/timers/rtctest_setdate.c)0
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c37
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json11
-rw-r--r--tools/testing/selftests/timers/.gitignore3
-rw-r--r--tools/testing/selftests/timers/Makefile4
-rw-r--r--tools/testing/selftests/timers/rtcpie.c134
-rw-r--r--tools/testing/selftests/timers/rtctest.c403
-rw-r--r--tools/testing/selftests/x86/Makefile10
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c285
-rw-r--r--tools/testing/selftests/x86/mpx-mini-test.c7
-rw-r--r--tools/testing/selftests/x86/pkey-helpers.h20
-rw-r--r--tools/testing/selftests/x86/protection_keys.c254
-rw-r--r--tools/testing/selftests/x86/test_syscall_vdso.c35
-rw-r--r--tools/testing/selftests/x86/trivial_program.c10
139 files changed, 12222 insertions, 887 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index dbda89c9d9b9..305130de910c 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -3,6 +3,7 @@ TARGETS = android
TARGETS += bpf
TARGETS += breakpoints
TARGETS += capabilities
+TARGETS += cgroup
TARGETS += cpufreq
TARGETS += cpu-hotplug
TARGETS += efivarfs
@@ -15,6 +16,7 @@ TARGETS += gpio
TARGETS += intel_pstate
TARGETS += ipc
TARGETS += kcmp
+TARGETS += kvm
TARGETS += lib
TARGETS += membarrier
TARGETS += memfd
@@ -24,8 +26,10 @@ TARGETS += mqueue
TARGETS += net
TARGETS += nsfs
TARGETS += powerpc
+TARGETS += proc
TARGETS += pstore
TARGETS += ptrace
+TARGETS += rtc
TARGETS += seccomp
TARGETS += sigaltstack
TARGETS += size
@@ -67,6 +71,12 @@ ifndef BUILD
BUILD := $(shell pwd)
endif
+# KSFT_TAP_LEVEL is used from KSFT framework to prevent nested TAP header
+# printing from tests. Applicable to run_tests case where run_tests adds
+# TAP header prior running tests and when a test program invokes another
+# with system() call. Export it here to cover override RUN_TESTS defines.
+export KSFT_TAP_LEVEL=`echo 1`
+
export BUILD
all:
@for TARGET in $(TARGETS); do \
@@ -126,11 +136,15 @@ ifdef INSTALL_PATH
echo "else" >> $(ALL_SCRIPT)
echo " OUTPUT=/dev/stdout" >> $(ALL_SCRIPT)
echo "fi" >> $(ALL_SCRIPT)
+ echo "export KSFT_TAP_LEVEL=1" >> $(ALL_SCRIPT)
+ echo "export skip=4" >> $(ALL_SCRIPT)
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
+ echo "echo ; echo TAP version 13" >> $(ALL_SCRIPT); \
+ echo "echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
echo "echo ========================================" >> $(ALL_SCRIPT); \
+ echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
echo "cd $$TARGET" >> $(ALL_SCRIPT); \
make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
index f6304d2be90c..72c25a3cb658 100644
--- a/tools/testing/selftests/android/Makefile
+++ b/tools/testing/selftests/android/Makefile
@@ -18,10 +18,6 @@ all:
fi \
done
-override define RUN_TESTS
- @cd $(OUTPUT); ./run.sh
-endef
-
override define INSTALL_RULE
mkdir -p $(INSTALL_PATH)
install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
@@ -33,10 +29,6 @@ override define INSTALL_RULE
done;
endef
-override define EMIT_TESTS
- echo "./run.sh"
-endef
-
override define CLEAN
@for DIR in $(SUBDIRS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore
index 67e6f391b2a9..95e8f4561474 100644
--- a/tools/testing/selftests/android/ion/.gitignore
+++ b/tools/testing/selftests/android/ion/.gitignore
@@ -1,2 +1,3 @@
ionapp_export
ionapp_import
+ionmap_test
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
index 96e0c448b39d..e03695287f76 100644
--- a/tools/testing/selftests/android/ion/Makefile
+++ b/tools/testing/selftests/android/ion/Makefile
@@ -1,8 +1,8 @@
-INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/
+INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ -I../../../../../usr/include/
CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
-TEST_GEN_FILES := ionapp_export ionapp_import
+TEST_GEN_FILES := ionapp_export ionapp_import ionmap_test
all: $(TEST_GEN_FILES)
@@ -14,3 +14,4 @@ include ../../lib.mk
$(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
$(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c
+$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c
diff --git a/tools/testing/selftests/android/ion/config b/tools/testing/selftests/android/ion/config
index 19db6ca9aa2b..b4ad748a9dd9 100644
--- a/tools/testing/selftests/android/ion/config
+++ b/tools/testing/selftests/android/ion/config
@@ -2,3 +2,4 @@ CONFIG_ANDROID=y
CONFIG_STAGING=y
CONFIG_ION=y
CONFIG_ION_SYSTEM_HEAP=y
+CONFIG_DRM_VGEM=y
diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh
index a1aff506f5e6..69e676cfc94e 100755
--- a/tools/testing/selftests/android/ion/ion_test.sh
+++ b/tools/testing/selftests/android/ion/ion_test.sh
@@ -4,6 +4,9 @@ heapsize=4096
TCID="ion_test.sh"
errcode=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
run_test()
{
heaptype=$1
@@ -25,7 +28,7 @@ check_root()
uid=$(id -u)
if [ $uid -ne 0 ]; then
echo $TCID: must be run as root >&2
- exit 0
+ exit $ksft_skip
fi
}
@@ -35,7 +38,7 @@ check_device()
if [ ! -e $DEVICE ]; then
echo $TCID: No $DEVICE device found >&2
echo $TCID: May be CONFIG_ION is not set >&2
- exit 0
+ exit $ksft_skip
fi
}
diff --git a/tools/testing/selftests/android/ion/ionmap_test.c b/tools/testing/selftests/android/ion/ionmap_test.c
new file mode 100644
index 000000000000..dab36b06b37d
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionmap_test.c
@@ -0,0 +1,136 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm.h>
+
+#include "ion.h"
+#include "ionutils.h"
+
+int check_vgem(int fd)
+{
+ drm_version_t version = { 0 };
+ char name[5];
+ int ret;
+
+ version.name_len = 4;
+ version.name = name;
+
+ ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
+ if (ret)
+ return 1;
+
+ return strcmp(name, "vgem");
+}
+
+int open_vgem(void)
+{
+ int i, fd;
+ const char *drmstr = "/dev/dri/card";
+
+ fd = -1;
+ for (i = 0; i < 16; i++) {
+ char name[80];
+
+ sprintf(name, "%s%u", drmstr, i);
+
+ fd = open(name, O_RDWR);
+ if (fd < 0)
+ continue;
+
+ if (check_vgem(fd)) {
+ close(fd);
+ continue;
+ } else {
+ break;
+ }
+
+ }
+ return fd;
+}
+
+int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle)
+{
+ struct drm_prime_handle import_handle = { 0 };
+ int ret;
+
+ import_handle.fd = dma_buf_fd;
+ import_handle.flags = 0;
+ import_handle.handle = 0;
+
+ ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle);
+ if (ret == 0)
+ *handle = import_handle.handle;
+ return ret;
+}
+
+void close_handle(int vgem_fd, uint32_t handle)
+{
+ struct drm_gem_close close = { 0 };
+
+ close.handle = handle;
+ ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+
+int main()
+{
+ int ret, vgem_fd;
+ struct ion_buffer_info info;
+ uint32_t handle = 0;
+ struct dma_buf_sync sync = { 0 };
+
+ info.heap_type = ION_HEAP_TYPE_SYSTEM;
+ info.heap_size = 4096;
+ info.flag_type = ION_FLAG_CACHED;
+
+ ret = ion_export_buffer_fd(&info);
+ if (ret < 0) {
+ printf("ion buffer alloc failed\n");
+ return -1;
+ }
+
+ vgem_fd = open_vgem();
+ if (vgem_fd < 0) {
+ ret = vgem_fd;
+ printf("Failed to open vgem\n");
+ goto out_ion;
+ }
+
+ ret = import_vgem_fd(vgem_fd, info.buffd, &handle);
+
+ if (ret < 0) {
+ printf("Failed to import buffer\n");
+ goto out_vgem;
+ }
+
+ sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW;
+ ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
+ if (ret)
+ printf("sync start failed %d\n", errno);
+
+ memset(info.buffer, 0xff, 4096);
+
+ sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW;
+ ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
+ if (ret)
+ printf("sync end failed %d\n", errno);
+
+ close_handle(vgem_fd, handle);
+ ret = 0;
+
+out_vgem:
+ close(vgem_fd);
+out_ion:
+ ion_close_buffer_fd(&info);
+ printf("done.\n");
+ return ret;
+}
diff --git a/tools/testing/selftests/android/ion/ionutils.c b/tools/testing/selftests/android/ion/ionutils.c
index ce69c14f51fa..7d1d37c4ef6a 100644
--- a/tools/testing/selftests/android/ion/ionutils.c
+++ b/tools/testing/selftests/android/ion/ionutils.c
@@ -80,11 +80,6 @@ int ion_export_buffer_fd(struct ion_buffer_info *ion_info)
heap_id = MAX_HEAP_COUNT + 1;
for (i = 0; i < query.cnt; i++) {
if (heap_data[i].type == ion_info->heap_type) {
- printf("--------------------------------------\n");
- printf("heap type: %d\n", heap_data[i].type);
- printf(" heap id: %d\n", heap_data[i].heap_id);
- printf("heap name: %s\n", heap_data[i].name);
- printf("--------------------------------------\n");
heap_id = heap_data[i].heap_id;
break;
}
@@ -204,7 +199,6 @@ void ion_close_buffer_fd(struct ion_buffer_info *ion_info)
/* Finally, close the client fd */
if (ion_info->ionfd > 0)
close(ion_info->ionfd);
- printf("<%s>: buffer release successfully....\n", __func__);
}
}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 9cf83f895d98..5e1ab2f0eb79 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -12,3 +12,6 @@ test_tcpbpf_user
test_verifier_log
feature
test_libbpf_open
+test_sock
+test_sock_addr
+urandom_read
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 983dd25d49f4..1eefe211a4a8 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -5,3 +5,5 @@ CONFIG_BPF_EVENTS=y
CONFIG_TEST_BPF=m
CONFIG_CGROUP_BPF=y
CONFIG_NETDEVSIM=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_SCH_INGRESS=y
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index faadbe233966..4123d0ab90ba 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1108,7 +1108,7 @@ static void test_stacktrace_build_id(void)
assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
== 0);
- assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0);
+ assert(system("./urandom_read") == 0);
/* disable stack trace collection */
key = 0;
val = 1;
@@ -1158,7 +1158,7 @@ static void test_stacktrace_build_id(void)
} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
CHECK(build_id_matches < 1, "build id match",
- "Didn't find expected build ID from the map");
+ "Didn't find expected build ID from the map\n");
disable_pmu:
ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index 73bb20cfb9b7..f4d99fabc56d 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -13,6 +13,7 @@
#include <bpf/bpf.h>
#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
#ifndef ARRAY_SIZE
# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index d488f20926e8..2950f80ba7fb 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -15,6 +15,7 @@
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
#define CG_PATH "/foo"
#define CONNECT4_PROG_PATH "./connect4_prog.o"
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
index c6e1dcf992c4..9832a875a828 100755
--- a/tools/testing/selftests/bpf/test_sock_addr.sh
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -4,7 +4,7 @@ set -eu
ping_once()
{
- ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1
+ ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
}
wait_for_ip()
@@ -13,7 +13,7 @@ wait_for_ip()
echo -n "Wait for testing IPv4/IPv6 to become available "
for _i in $(seq ${MAX_PING_TRIES}); do
echo -n "."
- if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then
+ if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then
echo " OK"
return
fi
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3e7718b1a9ae..fd7de7eb329e 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -11713,6 +11713,11 @@ static void get_unpriv_disabled()
FILE *fd;
fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+ if (!fd) {
+ perror("fopen /proc/sys/"UNPRIV_SYSCTL);
+ unpriv_disabled = true;
+ return;
+ }
if (fgets(buf, 2, fd) == buf && atoi(buf))
unpriv_disabled = true;
fclose(fd);
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index 3fece06e9f64..f82dcc1f8841 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -143,10 +143,14 @@ void suspend(void)
int err;
struct itimerspec spec = {};
+ if (getuid() != 0)
+ ksft_exit_skip("Please run the test as root - Exiting.\n");
+
power_state_fd = open("/sys/power/state", O_RDWR);
if (power_state_fd < 0)
ksft_exit_fail_msg(
- "open(\"/sys/power/state\") failed (is this test running as root?)\n");
+ "open(\"/sys/power/state\") failed %s)\n",
+ strerror(errno));
timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
if (timerfd < 0)
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
new file mode 100644
index 000000000000..f7a31392eb2f
--- /dev/null
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wall
+
+all:
+
+TEST_GEN_PROGS = test_memcontrol
+
+include ../lib.mk
+
+$(OUTPUT)/test_memcontrol: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
new file mode 100644
index 000000000000..b69bdeb4b9fe
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "cgroup_util.h"
+
+static ssize_t read_text(const char *path, char *buf, size_t max_len)
+{
+ ssize_t len;
+ int fd;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ len = read(fd, buf, max_len - 1);
+ if (len < 0)
+ goto out;
+
+ buf[len] = 0;
+out:
+ close(fd);
+ return len;
+}
+
+static ssize_t write_text(const char *path, char *buf, size_t len)
+{
+ int fd;
+
+ fd = open(path, O_WRONLY | O_APPEND);
+ if (fd < 0)
+ return fd;
+
+ len = write(fd, buf, len);
+ if (len < 0) {
+ close(fd);
+ return len;
+ }
+
+ close(fd);
+
+ return len;
+}
+
+char *cg_name(const char *root, const char *name)
+{
+ size_t len = strlen(root) + strlen(name) + 2;
+ char *ret = malloc(len);
+
+ snprintf(ret, len, "%s/%s", root, name);
+
+ return ret;
+}
+
+char *cg_name_indexed(const char *root, const char *name, int index)
+{
+ size_t len = strlen(root) + strlen(name) + 10;
+ char *ret = malloc(len);
+
+ snprintf(ret, len, "%s/%s_%d", root, name, index);
+
+ return ret;
+}
+
+int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+
+ if (read_text(path, buf, len) >= 0)
+ return 0;
+
+ return -1;
+}
+
+int cg_read_strcmp(const char *cgroup, const char *control,
+ const char *expected)
+{
+ size_t size = strlen(expected) + 1;
+ char *buf;
+
+ buf = malloc(size);
+ if (!buf)
+ return -1;
+
+ if (cg_read(cgroup, control, buf, size))
+ return -1;
+
+ return strcmp(expected, buf);
+}
+
+int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
+{
+ char buf[PAGE_SIZE];
+
+ if (cg_read(cgroup, control, buf, sizeof(buf)))
+ return -1;
+
+ return strstr(buf, needle) ? 0 : -1;
+}
+
+long cg_read_long(const char *cgroup, const char *control)
+{
+ char buf[128];
+
+ if (cg_read(cgroup, control, buf, sizeof(buf)))
+ return -1;
+
+ return atol(buf);
+}
+
+long cg_read_key_long(const char *cgroup, const char *control, const char *key)
+{
+ char buf[PAGE_SIZE];
+ char *ptr;
+
+ if (cg_read(cgroup, control, buf, sizeof(buf)))
+ return -1;
+
+ ptr = strstr(buf, key);
+ if (!ptr)
+ return -1;
+
+ return atol(ptr + strlen(key));
+}
+
+int cg_write(const char *cgroup, const char *control, char *buf)
+{
+ char path[PATH_MAX];
+ size_t len = strlen(buf);
+
+ snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+
+ if (write_text(path, buf, len) == len)
+ return 0;
+
+ return -1;
+}
+
+int cg_find_unified_root(char *root, size_t len)
+{
+ char buf[10 * PAGE_SIZE];
+ char *fs, *mount, *type;
+ const char delim[] = "\n\t ";
+
+ if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
+ return -1;
+
+ /*
+ * Example:
+ * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
+ */
+ for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
+ mount = strtok(NULL, delim);
+ type = strtok(NULL, delim);
+ strtok(NULL, delim);
+ strtok(NULL, delim);
+ strtok(NULL, delim);
+
+ if (strcmp(fs, "cgroup") == 0 &&
+ strcmp(type, "cgroup2") == 0) {
+ strncpy(root, mount, len);
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+int cg_create(const char *cgroup)
+{
+ return mkdir(cgroup, 0644);
+}
+
+static int cg_killall(const char *cgroup)
+{
+ char buf[PAGE_SIZE];
+ char *ptr = buf;
+
+ if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
+ return -1;
+
+ while (ptr < buf + sizeof(buf)) {
+ int pid = strtol(ptr, &ptr, 10);
+
+ if (pid == 0)
+ break;
+ if (*ptr)
+ ptr++;
+ else
+ break;
+ if (kill(pid, SIGKILL))
+ return -1;
+ }
+
+ return 0;
+}
+
+int cg_destroy(const char *cgroup)
+{
+ int ret;
+
+retry:
+ ret = rmdir(cgroup);
+ if (ret && errno == EBUSY) {
+ ret = cg_killall(cgroup);
+ if (ret)
+ return ret;
+ usleep(100);
+ goto retry;
+ }
+
+ if (ret && errno == ENOENT)
+ ret = 0;
+
+ return ret;
+}
+
+int cg_run(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg)
+{
+ int pid, retcode;
+
+ pid = fork();
+ if (pid < 0) {
+ return pid;
+ } else if (pid == 0) {
+ char buf[64];
+
+ snprintf(buf, sizeof(buf), "%d", getpid());
+ if (cg_write(cgroup, "cgroup.procs", buf))
+ exit(EXIT_FAILURE);
+ exit(fn(cgroup, arg));
+ } else {
+ waitpid(pid, &retcode, 0);
+ if (WIFEXITED(retcode))
+ return WEXITSTATUS(retcode);
+ else
+ return -1;
+ }
+}
+
+int cg_run_nowait(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg)
+{
+ int pid;
+
+ pid = fork();
+ if (pid == 0) {
+ char buf[64];
+
+ snprintf(buf, sizeof(buf), "%d", getpid());
+ if (cg_write(cgroup, "cgroup.procs", buf))
+ exit(EXIT_FAILURE);
+ exit(fn(cgroup, arg));
+ }
+
+ return pid;
+}
+
+int get_temp_fd(void)
+{
+ return open(".", O_TMPFILE | O_RDWR | O_EXCL);
+}
+
+int alloc_pagecache(int fd, size_t size)
+{
+ char buf[PAGE_SIZE];
+ struct stat st;
+ int i;
+
+ if (fstat(fd, &st))
+ goto cleanup;
+
+ size += st.st_size;
+
+ if (ftruncate(fd, size))
+ goto cleanup;
+
+ for (i = 0; i < size; i += sizeof(buf))
+ read(fd, buf, sizeof(buf));
+
+ return 0;
+
+cleanup:
+ return -1;
+}
+
+int alloc_anon(const char *cgroup, void *arg)
+{
+ size_t size = (unsigned long)arg;
+ char *buf, *ptr;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ free(buf);
+ return 0;
+}
+
+int is_swap_enabled(void)
+{
+ char buf[PAGE_SIZE];
+ const char delim[] = "\n";
+ int cnt = 0;
+ char *line;
+
+ if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
+ return -1;
+
+ for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
+ cnt++;
+
+ return cnt > 1;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
new file mode 100644
index 000000000000..fe82a297d4e0
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+
+#define PAGE_SIZE 4096
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define MB(x) (x << 20)
+
+/*
+ * Checks if two given values differ by less than err% of their sum.
+ */
+static inline int values_close(long a, long b, int err)
+{
+ return abs(a - b) <= (a + b) / 100 * err;
+}
+
+extern int cg_find_unified_root(char *root, size_t len);
+extern char *cg_name(const char *root, const char *name);
+extern char *cg_name_indexed(const char *root, const char *name, int index);
+extern int cg_create(const char *cgroup);
+extern int cg_destroy(const char *cgroup);
+extern int cg_read(const char *cgroup, const char *control,
+ char *buf, size_t len);
+extern int cg_read_strcmp(const char *cgroup, const char *control,
+ const char *expected);
+extern int cg_read_strstr(const char *cgroup, const char *control,
+ const char *needle);
+extern long cg_read_long(const char *cgroup, const char *control);
+long cg_read_key_long(const char *cgroup, const char *control, const char *key);
+extern int cg_write(const char *cgroup, const char *control, char *buf);
+extern int cg_run(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg);
+extern int cg_run_nowait(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg);
+extern int get_temp_fd(void);
+extern int alloc_pagecache(int fd, size_t size);
+extern int alloc_anon(const char *cgroup, void *arg);
+extern int is_swap_enabled(void);
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
new file mode 100644
index 000000000000..cf0bddc9d271
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -0,0 +1,1015 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+/*
+ * This test creates two nested cgroups with and without enabling
+ * the memory controller.
+ */
+static int test_memcg_subtree_control(const char *root)
+{
+ char *parent, *child, *parent2, *child2;
+ int ret = KSFT_FAIL;
+ char buf[PAGE_SIZE];
+
+ /* Create two nested cgroups with the memory controller enabled */
+ parent = cg_name(root, "memcg_test_0");
+ child = cg_name(root, "memcg_test_0/memcg_test_1");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_read_strstr(child, "cgroup.controllers", "memory"))
+ goto cleanup;
+
+ /* Create two nested cgroups without enabling memory controller */
+ parent2 = cg_name(root, "memcg_test_1");
+ child2 = cg_name(root, "memcg_test_1/memcg_test_1");
+ if (!parent2 || !child2)
+ goto cleanup;
+
+ if (cg_create(parent2))
+ goto cleanup;
+
+ if (cg_create(child2))
+ goto cleanup;
+
+ if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
+ goto cleanup;
+
+ if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(child);
+ cg_destroy(parent);
+ free(parent);
+ free(child);
+
+ cg_destroy(child2);
+ cg_destroy(parent2);
+ free(parent2);
+ free(child2);
+
+ return ret;
+}
+
+static int alloc_anon_50M_check(const char *cgroup, void *arg)
+{
+ size_t size = MB(50);
+ char *buf, *ptr;
+ long anon, current;
+ int ret = -1;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ current = cg_read_long(cgroup, "memory.current");
+ if (current < size)
+ goto cleanup;
+
+ if (!values_close(size, current, 3))
+ goto cleanup;
+
+ anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
+ if (anon < 0)
+ goto cleanup;
+
+ if (!values_close(anon, current, 3))
+ goto cleanup;
+
+ ret = 0;
+cleanup:
+ free(buf);
+ return ret;
+}
+
+static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
+{
+ size_t size = MB(50);
+ int ret = -1;
+ long current, file;
+ int fd;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ return -1;
+
+ if (alloc_pagecache(fd, size))
+ goto cleanup;
+
+ current = cg_read_long(cgroup, "memory.current");
+ if (current < size)
+ goto cleanup;
+
+ file = cg_read_key_long(cgroup, "memory.stat", "file ");
+ if (file < 0)
+ goto cleanup;
+
+ if (!values_close(file, current, 10))
+ goto cleanup;
+
+ ret = 0;
+
+cleanup:
+ close(fd);
+ return ret;
+}
+
+/*
+ * This test create a memory cgroup, allocates
+ * some anonymous memory and some pagecache
+ * and check memory.current and some memory.stat values.
+ */
+static int test_memcg_current(const char *root)
+{
+ int ret = KSFT_FAIL;
+ long current;
+ char *memcg;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ current = cg_read_long(memcg, "memory.current");
+ if (current != 0)
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_anon_50M_check, NULL))
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+static int alloc_pagecache_50M(const char *cgroup, void *arg)
+{
+ int fd = (long)arg;
+
+ return alloc_pagecache(fd, MB(50));
+}
+
+static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
+{
+ int fd = (long)arg;
+ int ppid = getppid();
+
+ if (alloc_pagecache(fd, MB(50)))
+ return -1;
+
+ while (getppid() == ppid)
+ sleep(1);
+
+ return 0;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A memory.min = 50M, memory.max = 200M
+ * A/B memory.min = 50M, memory.current = 50M
+ * A/B/C memory.min = 75M, memory.current = 50M
+ * A/B/D memory.min = 25M, memory.current = 50M
+ * A/B/E memory.min = 500M, memory.current = 0
+ * A/B/F memory.min = 0, memory.current = 50M
+ *
+ * Usages are pagecache, but the test keeps a running
+ * process in every leaf cgroup.
+ * Then it creates A/G and creates a significant
+ * memory pressure in it.
+ *
+ * A/B memory.current ~= 50M
+ * A/B/C memory.current ~= 33M
+ * A/B/D memory.current ~= 17M
+ * A/B/E memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is
+ * unprotected memory in A available, and checks
+ * checks that memory.min protects pagecache even
+ * in this case.
+ */
+static int test_memcg_min(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent[3] = {NULL};
+ char *children[4] = {NULL};
+ long c[4];
+ int i, attempts;
+ int fd;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ goto cleanup;
+
+ parent[0] = cg_name(root, "memcg_test_0");
+ if (!parent[0])
+ goto cleanup;
+
+ parent[1] = cg_name(parent[0], "memcg_test_1");
+ if (!parent[1])
+ goto cleanup;
+
+ parent[2] = cg_name(parent[0], "memcg_test_2");
+ if (!parent[2])
+ goto cleanup;
+
+ if (cg_create(parent[0]))
+ goto cleanup;
+
+ if (cg_read_long(parent[0], "memory.min")) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "memory.max", "200M"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_create(parent[1]))
+ goto cleanup;
+
+ if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_create(parent[2]))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ children[i] = cg_name_indexed(parent[1], "child_memcg", i);
+ if (!children[i])
+ goto cleanup;
+
+ if (cg_create(children[i]))
+ goto cleanup;
+
+ if (i == 2)
+ continue;
+
+ cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
+ (void *)(long)fd);
+ }
+
+ if (cg_write(parent[0], "memory.min", "50M"))
+ goto cleanup;
+ if (cg_write(parent[1], "memory.min", "50M"))
+ goto cleanup;
+ if (cg_write(children[0], "memory.min", "75M"))
+ goto cleanup;
+ if (cg_write(children[1], "memory.min", "25M"))
+ goto cleanup;
+ if (cg_write(children[2], "memory.min", "500M"))
+ goto cleanup;
+ if (cg_write(children[3], "memory.min", "0"))
+ goto cleanup;
+
+ attempts = 0;
+ while (!values_close(cg_read_long(parent[1], "memory.current"),
+ MB(150), 3)) {
+ if (attempts++ > 5)
+ break;
+ sleep(1);
+ }
+
+ if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
+ goto cleanup;
+
+ if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++)
+ c[i] = cg_read_long(children[i], "memory.current");
+
+ if (!values_close(c[0], MB(33), 10))
+ goto cleanup;
+
+ if (!values_close(c[1], MB(17), 10))
+ goto cleanup;
+
+ if (!values_close(c[2], 0, 1))
+ goto cleanup;
+
+ if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
+ goto cleanup;
+
+ if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
+ if (!children[i])
+ continue;
+
+ cg_destroy(children[i]);
+ free(children[i]);
+ }
+
+ for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
+ if (!parent[i])
+ continue;
+
+ cg_destroy(parent[i]);
+ free(parent[i]);
+ }
+ close(fd);
+ return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A memory.low = 50M, memory.max = 200M
+ * A/B memory.low = 50M, memory.current = 50M
+ * A/B/C memory.low = 75M, memory.current = 50M
+ * A/B/D memory.low = 25M, memory.current = 50M
+ * A/B/E memory.low = 500M, memory.current = 0
+ * A/B/F memory.low = 0, memory.current = 50M
+ *
+ * Usages are pagecache.
+ * Then it creates A/G an creates a significant
+ * memory pressure in it.
+ *
+ * Then it checks actual memory usages and expects that:
+ * A/B memory.current ~= 50M
+ * A/B/ memory.current ~= 33M
+ * A/B/D memory.current ~= 17M
+ * A/B/E memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is
+ * unprotected memory in A available,
+ * and checks low and oom events in memory.events.
+ */
+static int test_memcg_low(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent[3] = {NULL};
+ char *children[4] = {NULL};
+ long low, oom;
+ long c[4];
+ int i;
+ int fd;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ goto cleanup;
+
+ parent[0] = cg_name(root, "memcg_test_0");
+ if (!parent[0])
+ goto cleanup;
+
+ parent[1] = cg_name(parent[0], "memcg_test_1");
+ if (!parent[1])
+ goto cleanup;
+
+ parent[2] = cg_name(parent[0], "memcg_test_2");
+ if (!parent[2])
+ goto cleanup;
+
+ if (cg_create(parent[0]))
+ goto cleanup;
+
+ if (cg_read_long(parent[0], "memory.low"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "memory.max", "200M"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_create(parent[1]))
+ goto cleanup;
+
+ if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_create(parent[2]))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ children[i] = cg_name_indexed(parent[1], "child_memcg", i);
+ if (!children[i])
+ goto cleanup;
+
+ if (cg_create(children[i]))
+ goto cleanup;
+
+ if (i == 2)
+ continue;
+
+ if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
+ goto cleanup;
+ }
+
+ if (cg_write(parent[0], "memory.low", "50M"))
+ goto cleanup;
+ if (cg_write(parent[1], "memory.low", "50M"))
+ goto cleanup;
+ if (cg_write(children[0], "memory.low", "75M"))
+ goto cleanup;
+ if (cg_write(children[1], "memory.low", "25M"))
+ goto cleanup;
+ if (cg_write(children[2], "memory.low", "500M"))
+ goto cleanup;
+ if (cg_write(children[3], "memory.low", "0"))
+ goto cleanup;
+
+ if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
+ goto cleanup;
+
+ if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++)
+ c[i] = cg_read_long(children[i], "memory.current");
+
+ if (!values_close(c[0], MB(33), 10))
+ goto cleanup;
+
+ if (!values_close(c[1], MB(17), 10))
+ goto cleanup;
+
+ if (!values_close(c[2], 0, 1))
+ goto cleanup;
+
+ if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
+ fprintf(stderr,
+ "memory.low prevents from allocating anon memory\n");
+ goto cleanup;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ oom = cg_read_key_long(children[i], "memory.events", "oom ");
+ low = cg_read_key_long(children[i], "memory.events", "low ");
+
+ if (oom)
+ goto cleanup;
+ if (i < 2 && low <= 0)
+ goto cleanup;
+ if (i >= 2 && low)
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
+ if (!children[i])
+ continue;
+
+ cg_destroy(children[i]);
+ free(children[i]);
+ }
+
+ for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
+ if (!parent[i])
+ continue;
+
+ cg_destroy(parent[i]);
+ free(parent[i]);
+ }
+ close(fd);
+ return ret;
+}
+
+static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
+{
+ size_t size = MB(50);
+ int ret = -1;
+ long current;
+ int fd;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ return -1;
+
+ if (alloc_pagecache(fd, size))
+ goto cleanup;
+
+ current = cg_read_long(cgroup, "memory.current");
+ if (current <= MB(29) || current > MB(30))
+ goto cleanup;
+
+ ret = 0;
+
+cleanup:
+ close(fd);
+ return ret;
+
+}
+
+/*
+ * This test checks that memory.high limits the amount of
+ * memory which can be consumed by either anonymous memory
+ * or pagecache.
+ */
+static int test_memcg_high(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ long high;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_read_strcmp(memcg, "memory.high", "max\n"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.high", "30M"))
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
+ goto cleanup;
+
+ high = cg_read_key_long(memcg, "memory.events", "high ");
+ if (high <= 0)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+/*
+ * This test checks that memory.max limits the amount of
+ * memory which can be consumed by either anonymous memory
+ * or pagecache.
+ */
+static int test_memcg_max(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ long current, max;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_read_strcmp(memcg, "memory.max", "max\n"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "30M"))
+ goto cleanup;
+
+ /* Should be killed by OOM killer */
+ if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
+ goto cleanup;
+
+ current = cg_read_long(memcg, "memory.current");
+ if (current > MB(30) || !current)
+ goto cleanup;
+
+ max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (max <= 0)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
+{
+ long mem_max = (long)arg;
+ size_t size = MB(50);
+ char *buf, *ptr;
+ long mem_current, swap_current;
+ int ret = -1;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ mem_current = cg_read_long(cgroup, "memory.current");
+ if (!mem_current || !values_close(mem_current, mem_max, 3))
+ goto cleanup;
+
+ swap_current = cg_read_long(cgroup, "memory.swap.current");
+ if (!swap_current ||
+ !values_close(mem_current + swap_current, size, 3))
+ goto cleanup;
+
+ ret = 0;
+cleanup:
+ free(buf);
+ return ret;
+}
+
+/*
+ * This test checks that memory.swap.max limits the amount of
+ * anonymous memory which can be swapped out.
+ */
+static int test_memcg_swap_max(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ long max;
+
+ if (!is_swap_enabled())
+ return KSFT_SKIP;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_read_long(memcg, "memory.swap.current")) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_read_strcmp(memcg, "memory.max", "max\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "30M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "30M"))
+ goto cleanup;
+
+ /* Should be killed by OOM killer */
+ if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
+ goto cleanup;
+
+ max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (max <= 0)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM. Then it checks for oom and oom_kill events in
+ * memory.events.
+ */
+static int test_memcg_oom_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "30M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_read_strcmp(memcg, "cgroup.procs", ""))
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+struct tcp_server_args {
+ unsigned short port;
+ int ctl[2];
+};
+
+static int tcp_server(const char *cgroup, void *arg)
+{
+ struct tcp_server_args *srv_args = arg;
+ struct sockaddr_in6 saddr = { 0 };
+ socklen_t slen = sizeof(saddr);
+ int sk, client_sk, ctl_fd, yes = 1, ret = -1;
+
+ close(srv_args->ctl[0]);
+ ctl_fd = srv_args->ctl[1];
+
+ saddr.sin6_family = AF_INET6;
+ saddr.sin6_addr = in6addr_any;
+ saddr.sin6_port = htons(srv_args->port);
+
+ sk = socket(AF_INET6, SOCK_STREAM, 0);
+ if (sk < 0)
+ return ret;
+
+ if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
+ goto cleanup;
+
+ if (bind(sk, (struct sockaddr *)&saddr, slen)) {
+ write(ctl_fd, &errno, sizeof(errno));
+ goto cleanup;
+ }
+
+ if (listen(sk, 1))
+ goto cleanup;
+
+ ret = 0;
+ if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
+ ret = -1;
+ goto cleanup;
+ }
+
+ client_sk = accept(sk, NULL, NULL);
+ if (client_sk < 0)
+ goto cleanup;
+
+ ret = -1;
+ for (;;) {
+ uint8_t buf[0x100000];
+
+ if (write(client_sk, buf, sizeof(buf)) <= 0) {
+ if (errno == ECONNRESET)
+ ret = 0;
+ break;
+ }
+ }
+
+ close(client_sk);
+
+cleanup:
+ close(sk);
+ return ret;
+}
+
+static int tcp_client(const char *cgroup, unsigned short port)
+{
+ const char server[] = "localhost";
+ struct addrinfo *ai;
+ char servport[6];
+ int retries = 0x10; /* nice round number */
+ int sk, ret;
+
+ snprintf(servport, sizeof(servport), "%hd", port);
+ ret = getaddrinfo(server, servport, NULL, &ai);
+ if (ret)
+ return ret;
+
+ sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+ if (sk < 0)
+ goto free_ainfo;
+
+ ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
+ if (ret < 0)
+ goto close_sk;
+
+ ret = KSFT_FAIL;
+ while (retries--) {
+ uint8_t buf[0x100000];
+ long current, sock;
+
+ if (read(sk, buf, sizeof(buf)) <= 0)
+ goto close_sk;
+
+ current = cg_read_long(cgroup, "memory.current");
+ sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
+
+ if (current < 0 || sock < 0)
+ goto close_sk;
+
+ if (current < sock)
+ goto close_sk;
+
+ if (values_close(current, sock, 10)) {
+ ret = KSFT_PASS;
+ break;
+ }
+ }
+
+close_sk:
+ close(sk);
+free_ainfo:
+ freeaddrinfo(ai);
+ return ret;
+}
+
+/*
+ * This test checks socket memory accounting.
+ * The test forks a TCP server listens on a random port between 1000
+ * and 61000. Once it gets a client connection, it starts writing to
+ * its socket.
+ * The TCP client interleaves reads from the socket with check whether
+ * memory.current and memory.stat.sock are similar.
+ */
+static int test_memcg_sock(const char *root)
+{
+ int bind_retries = 5, ret = KSFT_FAIL, pid, err;
+ unsigned short port;
+ char *memcg;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ while (bind_retries--) {
+ struct tcp_server_args args;
+
+ if (pipe(args.ctl))
+ goto cleanup;
+
+ port = args.port = 1000 + rand() % 60000;
+
+ pid = cg_run_nowait(memcg, tcp_server, &args);
+ if (pid < 0)
+ goto cleanup;
+
+ close(args.ctl[1]);
+ if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
+ goto cleanup;
+ close(args.ctl[0]);
+
+ if (!err)
+ break;
+ if (err != EADDRINUSE)
+ goto cleanup;
+
+ waitpid(pid, NULL, 0);
+ }
+
+ if (err == EADDRINUSE) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (tcp_client(memcg, port) != KSFT_PASS)
+ goto cleanup;
+
+ waitpid(pid, &err, 0);
+ if (WEXITSTATUS(err))
+ goto cleanup;
+
+ if (cg_read_long(memcg, "memory.current") < 0)
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.stat", "sock "))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct memcg_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_memcg_subtree_control),
+ T(test_memcg_current),
+ T(test_memcg_min),
+ T(test_memcg_low),
+ T(test_memcg_high),
+ T(test_memcg_max),
+ T(test_memcg_oom_events),
+ T(test_memcg_swap_max),
+ T(test_memcg_sock),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ /*
+ * Check that memory controller is available:
+ * memory is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+ ksft_exit_skip("memory controller isn't available\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
index f3a8933c1275..bab13dd025a6 100755
--- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -2,6 +2,8 @@
# SPDX-License-Identifier: GPL-2.0
SYSFS=
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
prerequisite()
{
@@ -9,7 +11,7 @@ prerequisite()
if [ $UID != 0 ]; then
echo $msg must be run as root >&2
- exit 0
+ exit $ksft_skip
fi
taskset -p 01 $$
@@ -18,12 +20,12 @@ prerequisite()
if [ ! -d "$SYSFS" ]; then
echo $msg sysfs is not mounted >&2
- exit 0
+ exit $ksft_skip
fi
if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then
echo $msg cpu hotplug is not supported >&2
- exit 0
+ exit $ksft_skip
fi
echo "CPU online/offline summary:"
@@ -32,7 +34,7 @@ prerequisite()
if [[ "$online_cpus" = "$online_max" ]]; then
echo "$msg: since there is only one cpu: $online_cpus"
- exit 0
+ exit $ksft_skip
fi
echo -e "\t Cpus in online state: $online_cpus"
@@ -237,12 +239,12 @@ prerequisite_extra()
if [ ! -d "$DEBUGFS" ]; then
echo $msg debugfs is not mounted >&2
- exit 0
+ exit $ksft_skip
fi
if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
echo $msg cpu-notifier-error-inject module is not available >&2
- exit 0
+ exit $ksft_skip
fi
}
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index d83922de9d89..31f8c9a76c5f 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -13,6 +13,9 @@ SYSFS=
CPUROOT=
CPUFREQROOT=
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
helpme()
{
printf "Usage: $0 [-h] [-todg args]
@@ -38,7 +41,7 @@ prerequisite()
if [ $UID != 0 ]; then
echo $msg must be run as root >&2
- exit 2
+ exit $ksft_skip
fi
taskset -p 01 $$
diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
new file mode 100755
index 000000000000..1893d0f59ad7
--- /dev/null
+++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+usage() { echo "usbip_test.sh -b <busid> -p <usbip tools path>"; exit 1; }
+
+while getopts "h:b:p:" arg; do
+ case "${arg}" in
+ h)
+ usage
+ ;;
+ b)
+ busid=${OPTARG}
+ ;;
+ p)
+ tools_path=${OPTARG}
+ ;;
+ *)
+ usage
+ ;;
+ esac
+done
+shift $((OPTIND-1))
+
+if [ -z "${busid}" ]; then
+ usage
+fi
+
+echo "Running USB over IP Testing on $busid";
+
+test_end_msg="End of USB over IP Testing on $busid"
+
+if [ $UID != 0 ]; then
+ echo "Please run usbip_test as root [SKIP]"
+ echo $test_end_msg
+ exit $ksft_skip
+fi
+
+echo "Load usbip_host module"
+if ! /sbin/modprobe -q -n usbip_host; then
+ echo "usbip_test: module usbip_host is not found [SKIP]"
+ echo $test_end_msg
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q usbip_host; then
+ /sbin/modprobe -q -r test_bitmap
+ echo "usbip_test: module usbip_host is loaded [OK]"
+else
+ echo "usbip_test: module usbip_host failed to load [FAIL]"
+ echo $test_end_msg
+ exit 1
+fi
+
+echo "Load vhci_hcd module"
+if /sbin/modprobe -q vhci_hcd; then
+ /sbin/modprobe -q -r test_bitmap
+ echo "usbip_test: module vhci_hcd is loaded [OK]"
+else
+ echo "usbip_test: module vhci_hcd failed to load [FAIL]"
+ echo $test_end_msg
+ exit 1
+fi
+echo "=============================================================="
+
+cd $tools_path;
+
+if [ ! -f src/usbip ]; then
+ echo "Please build usbip tools"
+ echo $test_end_msg
+ exit $ksft_skip
+fi
+
+echo "Expect to see export-able devices";
+src/usbip list -l;
+echo "=============================================================="
+
+echo "Run lsusb to see all usb devices"
+lsusb -t;
+echo "=============================================================="
+
+src/usbipd -D;
+
+echo "Get exported devices from localhost - expect to see none";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "bind devices";
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - bound devices should be under usbip_host control"
+lsusb -t;
+echo "=============================================================="
+
+echo "bind devices - expect already bound messages"
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Get exported devices from localhost - expect to see exported devices";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "unbind devices";
+src/usbip unbind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - bound devices should be rebound to original drivers"
+lsusb -t;
+echo "=============================================================="
+
+echo "unbind devices - expect no devices bound message";
+src/usbip unbind -b $busid;
+echo "=============================================================="
+
+echo "Get exported devices from localhost - expect to see none";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - should fail with no devices"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "bind devices";
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "List imported devices - expect to see exported devices";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - should work"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "List imported devices - expect to see imported devices";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - expect already imported messages"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "Un-import devices";
+src/usbip detach -p 00;
+src/usbip detach -p 01;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Un-import devices - expect no devices to detach messages";
+src/usbip detach -p 00;
+src/usbip detach -p 01;
+echo "=============================================================="
+
+echo "Detach invalid port tests - expect invalid port error message";
+src/usbip detach -p 100;
+echo "=============================================================="
+
+echo "Expect to see export-able devices";
+src/usbip list -l;
+echo "=============================================================="
+
+echo "Remove usbip_host module";
+rmmod usbip_host;
+
+echo "Run lsusb - bound devices should be rebound to original drivers"
+lsusb -t;
+echo "=============================================================="
+
+echo "Run bind without usbip_host - expect fail"
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - devices that failed to bind aren't bound to any driver"
+lsusb -t;
+echo "=============================================================="
+
+echo "modprobe usbip_host - does it work?"
+/sbin/modprobe usbip_host
+echo "Should see -busid- is not in match_busid table... skip! dmesg"
+echo "=============================================================="
+dmesg | grep "is not in match_busid table"
+echo "=============================================================="
+
+echo $test_end_msg
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
index c6d5790575ae..a47029a799d2 100755
--- a/tools/testing/selftests/efivarfs/efivarfs.sh
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@@ -4,18 +4,21 @@
efivarfs_mount=/sys/firmware/efi/efivars
test_guid=210be57c-9849-4fc7-a635-e6382d1aec27
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
check_prereqs()
{
local msg="skip all tests:"
if [ $UID != 0 ]; then
echo $msg must be run as root >&2
- exit 0
+ exit $ksft_skip
fi
if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then
echo $msg efivarfs is not mounted on $efivarfs_mount >&2
- exit 0
+ exit $ksft_skip
fi
}
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 67cd4597db2b..47cbf54d0801 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -20,6 +20,8 @@
#include <string.h>
#include <unistd.h>
+#include "../kselftest.h"
+
static char longpath[2 * PATH_MAX] = "";
static char *envp[] = { "IN_TEST=yes", NULL, NULL };
static char *argv[] = { "execveat", "99", NULL };
@@ -249,8 +251,8 @@ static int run_tests(void)
errno = 0;
execveat_(-1, NULL, NULL, NULL, 0);
if (errno == ENOSYS) {
- printf("[FAIL] ENOSYS calling execveat - no kernel support?\n");
- return 1;
+ ksft_exit_skip(
+ "ENOSYS calling execveat - no kernel support?\n");
}
/* Change file position to confirm it doesn't affect anything */
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index 4e6d09fb166f..129880fb42d3 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,8 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := dnotify_test devpts_pts
-all: $(TEST_PROGS)
-include ../lib.mk
+CFLAGS += -I../../../../usr/include/
+TEST_GEN_PROGS := devpts_pts
+TEST_GEN_PROGS_EXTENDED := dnotify_test
-clean:
- rm -fr $(TEST_PROGS)
+include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c
index b9055e974289..b1fc9b916ace 100644
--- a/tools/testing/selftests/filesystems/devpts_pts.c
+++ b/tools/testing/selftests/filesystems/devpts_pts.c
@@ -8,9 +8,10 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
-#include <sys/ioctl.h>
+#include <asm/ioctls.h>
#include <sys/mount.h>
#include <sys/wait.h>
+#include "../kselftest.h"
static bool terminal_dup2(int duplicate, int original)
{
@@ -125,10 +126,12 @@ static int do_tiocgptpeer(char *ptmx, char *expected_procfd_contents)
if (errno == EINVAL) {
fprintf(stderr, "TIOCGPTPEER is not supported. "
"Skipping test.\n");
- fret = EXIT_SUCCESS;
+ fret = KSFT_SKIP;
+ } else {
+ fprintf(stderr,
+ "Failed to perform TIOCGPTPEER ioctl\n");
+ fret = EXIT_FAILURE;
}
-
- fprintf(stderr, "Failed to perform TIOCGPTPEER ioctl\n");
goto do_cleanup;
}
@@ -279,9 +282,9 @@ int main(int argc, char *argv[])
int ret;
if (!isatty(STDIN_FILENO)) {
- fprintf(stderr, "Standard input file desciptor is not attached "
+ fprintf(stderr, "Standard input file descriptor is not attached "
"to a terminal. Skipping test\n");
- exit(EXIT_FAILURE);
+ exit(KSFT_SKIP);
}
ret = unshare(CLONE_NEWNS);
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index 826f38d5dd19..261c81f08606 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -4,6 +4,7 @@
all:
TEST_PROGS := fw_run_tests.sh
+TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh
include ../lib.mk
diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
index 8e2e34a2ca69..70d18be46af5 100755
--- a/tools/testing/selftests/firmware/fw_fallback.sh
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -74,7 +74,7 @@ load_fw_custom()
{
if [ ! -e "$DIR"/trigger_custom_fallback ]; then
echo "$0: custom fallback trigger not present, ignoring test" >&2
- return 1
+ exit $ksft_skip
fi
local name="$1"
@@ -107,7 +107,7 @@ load_fw_custom_cancel()
{
if [ ! -e "$DIR"/trigger_custom_fallback ]; then
echo "$0: canceling custom fallback trigger not present, ignoring test" >&2
- return 1
+ exit $ksft_skip
fi
local name="$1"
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index 6452d2129cd9..a4320c4b44dc 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -30,6 +30,7 @@ fi
if [ ! -e "$DIR"/trigger_async_request ]; then
echo "$0: empty filename: async trigger not present, ignoring test" >&2
+ exit $ksft_skip
else
if printf '\000' >"$DIR"/trigger_async_request 2> /dev/null; then
echo "$0: empty filename should not succeed (async)" >&2
@@ -69,6 +70,7 @@ fi
# Try the asynchronous version too
if [ ! -e "$DIR"/trigger_async_request ]; then
echo "$0: firmware loading: async trigger not present, ignoring test" >&2
+ exit $ksft_skip
else
if ! echo -n "$NAME" >"$DIR"/trigger_async_request ; then
echo "$0: could not trigger async request" >&2
@@ -89,7 +91,7 @@ test_config_present()
{
if [ ! -f $DIR/reset ]; then
echo "Configuration triggers not present, ignoring test"
- exit 0
+ exit $ksft_skip
fi
}
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index 9ea31b57d71a..6c5f1b2ffb74 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -9,11 +9,14 @@ DIR=/sys/devices/virtual/misc/test_firmware
PROC_CONFIG="/proc/config.gz"
TEST_DIR=$(dirname $0)
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
print_reqs_exit()
{
echo "You must have the following enabled in your kernel:" >&2
cat $TEST_DIR/config >&2
- exit 1
+ exit $ksft_skip
}
test_modprobe()
@@ -88,7 +91,7 @@ verify_reqs()
if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then
if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
echo "usermode helper disabled so ignoring test"
- exit 0
+ exit $ksft_skip
fi
fi
}
@@ -154,11 +157,13 @@ test_finish()
if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
fi
- if [ "$OLD_FWPATH" = "" ]; then
- OLD_FWPATH=" "
- fi
if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then
- echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path
+ if [ "$OLD_FWPATH" = "" ]; then
+ # A zero-length write won't work; write a null byte
+ printf '\000' >/sys/module/firmware_class/parameters/path
+ else
+ echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path
+ fi
fi
if [ -f $FW ]; then
rm -f "$FW"
diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh
index 06d638e9dc62..cffdd4eb0a57 100755
--- a/tools/testing/selftests/firmware/fw_run_tests.sh
+++ b/tools/testing/selftests/firmware/fw_run_tests.sh
@@ -66,5 +66,5 @@ if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
run_test_config_0003
else
echo "Running basic kernel configuration, working with your config"
- run_test
+ run_tests
fi
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index df3dd7fe5f9b..2a4f16fc9819 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -59,6 +59,13 @@ disable_events() {
echo 0 > events/enable
}
+clear_synthetic_events() { # reset all current synthetic events
+ grep -v ^# synthetic_events |
+ while read line; do
+ echo "!$line" >> synthetic_events
+ done
+}
+
initialize_ftrace() { # Reset ftrace to initial-state
# As the initial state, ftrace will be set to nop tracer,
# no events, no triggers, no filters, no function filters,
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
new file mode 100644
index 000000000000..2aabab363cfb
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# description: event trigger - test extended error support
+
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test extended error support"
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null
+if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then
+ fail "Failed to generate extended error in histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
new file mode 100644
index 000000000000..7fd5b4a8f060
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test field variable support
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test field variable support"
+
+echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
+echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+ fail "Failed to create inter-event histogram"
+fi
+
+if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+ fail "Failed to create histogram with field variable"
+fi
+
+echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+
+if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+ fail "Failed to remove histogram with field variable"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
new file mode 100644
index 000000000000..c93dbe38b5df
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -0,0 +1,58 @@
+#!/bin/sh
+# description: event trigger - test inter-event combined histogram trigger
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+clear_synthetic_events
+
+echo "Test create synthetic event"
+
+echo 'waking_latency u64 lat pid_t pid' > synthetic_events
+if [ ! -d events/synthetic/waking_latency ]; then
+ fail "Failed to create waking_latency synthetic event"
+fi
+
+echo "Test combined histogram"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger
+
+echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
+echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
+
+echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+ fail "Failed to create combined histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
new file mode 100644
index 000000000000..c193dce611a2
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
@@ -0,0 +1,44 @@
+#!/bin/sh
+# description: event trigger - test multiple actions on hist trigger
+
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test multiple actions on hist trigger"
+echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+TRIGGER1=events/sched/sched_wakeup/trigger
+TRIGGER2=events/sched/sched_switch/trigger
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' > $TRIGGER1
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid) if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
new file mode 100644
index 000000000000..e84e7d048566
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+ fail "Failed to create onmatch action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
new file mode 100644
index 000000000000..7907d8aacde3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch-onmax action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch-onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then
+ fail "Failed to create onmatch-onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
new file mode 100644
index 000000000000..38b7ed6242b2
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -0,0 +1,48 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmax action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+ping localhost -c 3
+if ! grep -q "max:" events/sched/sched_switch/hist; then
+ fail "Failed to create onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
new file mode 100644
index 000000000000..cef11377dcbd
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test synthetic event create remove
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+reset_trigger
+
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null
+if [ -d events/synthetic/wakeup_latency ]; then
+ fail "Created wakeup_latency synthetic event with an invalid format"
+fi
+
+reset_trigger
+
+echo "Test remove synthetic event"
+echo '!wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to delete wakeup_latency synthetic event"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index a63e8453984d..12631f0076a1 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -17,10 +17,6 @@ all:
fi \
done
-override define RUN_TESTS
- @cd $(OUTPUT); ./run.sh
-endef
-
override define INSTALL_RULE
mkdir -p $(INSTALL_PATH)
install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
@@ -32,10 +28,6 @@ override define INSTALL_RULE
done;
endef
-override define EMIT_TESTS
- echo "./run.sh"
-endef
-
override define CLEAN
@for DIR in $(SUBDIRS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh
index 183fb932edbd..7f35b9880485 100755
--- a/tools/testing/selftests/gpio/gpio-mockup.sh
+++ b/tools/testing/selftests/gpio/gpio-mockup.sh
@@ -2,10 +2,11 @@
# SPDX-License-Identifier: GPL-2.0
#exit status
-#1: run as non-root user
+#1: Internal error
#2: sysfs/debugfs not mount
#3: insert module fail when gpio-mockup is a module.
-#4: other reason.
+#4: Skip test including run as non-root user.
+#5: other reason.
SYSFS=
GPIO_SYSFS=
@@ -15,6 +16,9 @@ GPIO_DEBUGFS=
dev_type=
module=
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
usage()
{
echo "Usage:"
@@ -34,7 +38,7 @@ prerequisite()
msg="skip all tests:"
if [ $UID != 0 ]; then
echo $msg must be run as root >&2
- exit 1
+ exit $ksft_skip
fi
SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
if [ ! -d "$SYSFS" ]; then
@@ -73,7 +77,7 @@ remove_module()
die()
{
remove_module
- exit 4
+ exit 5
}
test_chips()
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
index 5a3f7d37e912..7340fd6a9a9f 100644
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -2,7 +2,10 @@
CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
LDLIBS := $(LDLIBS) -lm
-ifeq (,$(filter $(ARCH),x86))
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq (x86,$(ARCH))
TEST_GEN_FILES := msr aperf
endif
diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
index d21edea9c560..f6cd03a87493 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -9,6 +9,8 @@
#include <sys/timeb.h>
#include <sched.h>
#include <errno.h>
+#include <string.h>
+#include "../kselftest.h"
void usage(char *name) {
printf ("Usage: %s cpunum\n", name);
@@ -41,8 +43,8 @@ int main(int argc, char **argv) {
fd = open(msr_file_name, O_RDONLY);
if (fd == -1) {
- perror("Failed to open");
- return 1;
+ printf("/dev/cpu/%d/msr: %s\n", cpu, strerror(errno));
+ return KSFT_SKIP;
}
CPU_ZERO(&cpuset);
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
index c670359becc6..e7008f614ad7 100755
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -30,9 +30,18 @@
EVALUATE_ONLY=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
echo "$0 # Skipped: Test can only run on x86 architectures."
- exit 0
+ exit $ksft_skip
+fi
+
+msg="skip all tests:"
+if [ $UID != 0 ] && [ $EVALUATE_ONLY == 0 ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
fi
max_cpus=$(($(nproc)-1))
@@ -48,11 +57,12 @@ function run_test () {
echo "sleeping for 5 seconds"
sleep 5
- num_freqs=$(cat /proc/cpuinfo | grep MHz | sort -u | wc -l)
- if [ $num_freqs -le 2 ]; then
- cat /proc/cpuinfo | grep MHz | sort -u | tail -1 > /tmp/result.$1
+ grep MHz /proc/cpuinfo | sort -u > /tmp/result.freqs
+ num_freqs=$(wc -l /tmp/result.freqs | awk ' { print $1 } ')
+ if [ $num_freqs -ge 2 ]; then
+ tail -n 1 /tmp/result.freqs > /tmp/result.$1
else
- cat /proc/cpuinfo | grep MHz | sort -u > /tmp/result.$1
+ cp /tmp/result.freqs /tmp/result.$1
fi
./msr 0 >> /tmp/result.$1
@@ -82,32 +92,37 @@ _max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ')
max_freq=$(($_max_freq / 1000))
-for freq in `seq $max_freq -100 $min_freq`
+[ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq`
do
echo "Setting maximum frequency to $freq"
cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null
- [ $EVALUATE_ONLY -eq 0 ] && run_test $freq
+ run_test $freq
done
-echo "=============================================================================="
+[ $EVALUATE_ONLY -eq 0 ] && cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
+echo "========================================================================"
echo "The marketing frequency of the cpu is $mkt_freq MHz"
echo "The maximum frequency of the cpu is $max_freq MHz"
echo "The minimum frequency of the cpu is $min_freq MHz"
-cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
-
# make a pretty table
-echo "Target Actual Difference MSR(0x199) max_perf_pct"
+echo "Target Actual Difference MSR(0x199) max_perf_pct" | tr " " "\n" > /tmp/result.tab
for freq in `seq $max_freq -100 $min_freq`
do
result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ')
msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ')
max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' )
- if [ $result_freq -eq $freq ]; then
- echo " $freq $result_freq 0 $msr $(($max_perf_pct*3300))"
- else
- echo " $freq $result_freq $(($result_freq-$freq)) $msr $(($max_perf_pct*$max_freq))"
- fi
+ cat >> /tmp/result.tab << EOF
+$freq
+$result_freq
+$((result_freq - freq))
+$msr
+$((max_perf_pct * max_freq))
+EOF
done
+
+# print the table
+pr -aTt -5 < /tmp/result.tab
+
exit 0
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index ee9382bdfadc..dac927e82336 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -196,10 +196,9 @@ int main(int argc, char **argv)
int msg, pid, err;
struct msgque_data msgque;
- if (getuid() != 0) {
- printf("Please run the test as root - Exiting.\n");
- return ksft_exit_fail();
- }
+ if (getuid() != 0)
+ return ksft_exit_skip(
+ "Please run the test as root - Exiting.\n");
msgque.key = ftok(argv[0], 822155650);
if (msgque.key == -1) {
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index 7956ea3be667..0a76314b4414 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -62,13 +62,16 @@ ALL_TESTS="$ALL_TESTS 0007:5:1"
ALL_TESTS="$ALL_TESTS 0008:150:1"
ALL_TESTS="$ALL_TESTS 0009:150:1"
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
test_modprobe()
{
if [ ! -d $DIR ]; then
echo "$0: $DIR not present" >&2
echo "You must have the following enabled in your kernel:" >&2
cat $TEST_DIR/config >&2
- exit 1
+ exit $ksft_skip
fi
}
@@ -105,12 +108,12 @@ test_reqs()
{
if ! which modprobe 2> /dev/null > /dev/null; then
echo "$0: You need modprobe installed" >&2
- exit 1
+ exit $ksft_skip
fi
if ! which kmod 2> /dev/null > /dev/null; then
echo "$0: You need kmod installed" >&2
- exit 1
+ exit $ksft_skip
fi
# kmod 19 has a bad bug where it returns 0 when modprobe
@@ -124,13 +127,13 @@ test_reqs()
echo "$0: You need at least kmod 20" >&2
echo "kmod <= 19 is buggy, for details see:" >&2
echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
- exit 1
+ exit $ksft_skip
fi
uid=$(id -u)
if [ $uid -ne 0 ]; then
echo $msg must be run as root >&2
- exit 0
+ exit $ksft_skip
fi
}
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 1a52b03962a3..15e6b75fc3a5 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -20,7 +20,7 @@
#define KSFT_XFAIL 2
#define KSFT_XPASS 3
/* Treat skip as pass */
-#define KSFT_SKIP KSFT_PASS
+#define KSFT_SKIP 4
/* counters */
struct ksft_count {
@@ -57,7 +57,8 @@ static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; }
static inline void ksft_print_header(void)
{
- printf("TAP version 13\n");
+ if (!(getenv("KSFT_TAP_LEVEL")))
+ printf("TAP version 13\n");
}
static inline void ksft_print_cnts(void)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index e81bd28bdd89..6ae3730c4ee3 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -107,6 +107,27 @@
__FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
/**
+ * XFAIL(statement, fmt, ...)
+ *
+ * @statement: statement to run after reporting XFAIL
+ * @fmt: format string
+ * @...: optional arguments
+ *
+ * This forces a "pass" after reporting a failure with an XFAIL prefix,
+ * and runs "statement", which is usually "return" or "goto skip".
+ */
+#define XFAIL(statement, fmt, ...) do { \
+ if (TH_LOG_ENABLED) { \
+ fprintf(TH_LOG_STREAM, "[ XFAIL! ] " fmt "\n", \
+ ##__VA_ARGS__); \
+ } \
+ /* TODO: find a way to pass xfail to test runner process. */ \
+ _metadata->passed = 1; \
+ _metadata->trigger = 0; \
+ statement; \
+} while (0)
+
+/**
* TEST(test_name) - Defines the test function and creates the registration
* stub
*
@@ -198,7 +219,7 @@
/**
* FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture.
- * *_metadata* is included so that ASSERT_* work as a convenience
+ * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
*
* @fixture_name: fixture name
*
@@ -221,6 +242,7 @@
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
/**
* FIXTURE_TEARDOWN(fixture_name)
+ * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
*
* @fixture_name: fixture name
*
@@ -253,6 +275,8 @@
* Defines a test that depends on a fixture (e.g., is part of a test case).
* Very similar to TEST() except that *self* is the setup instance of fixture's
* datatype exposed for use by the implementation.
+ *
+ * Warning: use of ASSERT_* here will skip TEARDOWN.
*/
/* TODO(wad) register fixtures on dedicated test lists. */
#define TEST_F(fixture_name, test_name) \
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
new file mode 100644
index 000000000000..63fc1ab9248f
--- /dev/null
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -0,0 +1,3 @@
+set_sregs_test
+sync_regs_test
+vmx_tsc_adjust_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
new file mode 100644
index 000000000000..d9d00319b07c
--- /dev/null
+++ b/tools/testing/selftests/kvm/Makefile
@@ -0,0 +1,40 @@
+all:
+
+top_srcdir = ../../../../
+UNAME_M := $(shell uname -m)
+
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
+LIBKVM_x86_64 = lib/x86.c lib/vmx.c
+
+TEST_GEN_PROGS_x86_64 = set_sregs_test
+TEST_GEN_PROGS_x86_64 += sync_regs_test
+TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
+
+TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
+LIBKVM += $(LIBKVM_$(UNAME_M))
+
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
+
+# After inclusion, $(OUTPUT) is defined and
+# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
+include ../lib.mk
+
+STATIC_LIBS := $(OUTPUT)/libkvm.a
+LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
+EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS)
+
+x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
+$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
+ $(AR) crs $@ $^
+
+$(LINUX_HDR_PATH):
+ make -C $(top_srcdir) headers_install
+
+all: $(STATIC_LIBS) $(LINUX_HDR_PATH)
+$(TEST_GEN_PROGS): $(STATIC_LIBS)
+$(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH)
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
new file mode 100644
index 000000000000..637b7017b6ee
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -0,0 +1,145 @@
+/*
+ * tools/testing/selftests/kvm/include/kvm_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+#ifndef SELFTEST_KVM_UTIL_H
+#define SELFTEST_KVM_UTIL_H 1
+
+#include "test_util.h"
+
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+#include <sys/ioctl.h>
+
+#include "sparsebit.h"
+
+/*
+ * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
+ * created. Only applies to VMs using EPT.
+ */
+#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
+
+
+/* Callers of kvm_util only have an incomplete/opaque description of the
+ * structure kvm_util is using to maintain the state of a VM.
+ */
+struct kvm_vm;
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+
+#define DEFAULT_GUEST_PHY_PAGES 512
+#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
+#define DEFAULT_STACK_PGS 5
+
+enum vm_guest_mode {
+ VM_MODE_FLAT48PG,
+};
+
+enum vm_mem_backing_src_type {
+ VM_MEM_SRC_ANONYMOUS,
+ VM_MEM_SRC_ANONYMOUS_THP,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB,
+};
+
+int kvm_check_cap(long cap);
+
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
+void kvm_vm_free(struct kvm_vm *vmp);
+
+int kvm_memcmp_hva_gva(void *hva,
+ struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
+
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+ uint32_t data_memslot, uint32_t pgd_memslot);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void vcpu_dump(FILE *stream, struct kvm_vm *vm,
+ uint32_t vcpuid, uint8_t indent);
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags);
+
+void vcpu_ioctl(struct kvm_vm *vm,
+ uint32_t vcpuid, unsigned long ioctl, void *arg);
+void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ uint32_t data_memslot, uint32_t pgd_memslot);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_mp_state *mp_state);
+void vcpu_regs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
+void vcpu_sregs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint32_t pgd_memslot);
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
+ vm_paddr_t paddr_min, uint32_t memslot);
+
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+void vcpu_set_cpuid(
+ struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
+
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
+
+static inline struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_entry(uint32_t function)
+{
+ return kvm_get_supported_cpuid_index(function, 0);
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code);
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+
+typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
+ vm_paddr_t vmxon_paddr,
+ vm_vaddr_t vmcs_vaddr,
+ vm_paddr_t vmcs_paddr);
+
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+ uint64_t end);
+
+struct kvm_dirty_log *
+allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
+
+int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
+
+#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
new file mode 100644
index 000000000000..54cfeb6568d3
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -0,0 +1,75 @@
+/*
+ * tools/testing/selftests/kvm/include/sparsebit.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * Header file that describes API to the sparsebit library.
+ * This library provides a memory efficient means of storing
+ * the settings of bits indexed via a uint64_t. Memory usage
+ * is reasonable, significantly less than (2^64 / 8) bytes, as
+ * long as bits that are mostly set or mostly cleared are close
+ * to each other. This library is efficient in memory usage
+ * even in the case where most bits are set.
+ */
+
+#ifndef _TEST_SPARSEBIT_H_
+#define _TEST_SPARSEBIT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct sparsebit;
+typedef uint64_t sparsebit_idx_t;
+typedef uint64_t sparsebit_num_t;
+
+struct sparsebit *sparsebit_alloc(void);
+void sparsebit_free(struct sparsebit **sbitp);
+void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
+
+bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_set_num(struct sparsebit *sbit,
+ sparsebit_idx_t idx, sparsebit_num_t num);
+bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_clear_num(struct sparsebit *sbit,
+ sparsebit_idx_t idx, sparsebit_num_t num);
+sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
+bool sparsebit_any_set(struct sparsebit *sbit);
+bool sparsebit_any_clear(struct sparsebit *sbit);
+bool sparsebit_all_set(struct sparsebit *sbit);
+bool sparsebit_all_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
+ sparsebit_idx_t start, sparsebit_num_t num);
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
+ sparsebit_idx_t start, sparsebit_num_t num);
+
+void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start,
+ sparsebit_num_t num);
+void sparsebit_set_all(struct sparsebit *sbitp);
+
+void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_clear_num(struct sparsebit *sbitp,
+ sparsebit_idx_t start, sparsebit_num_t num);
+void sparsebit_clear_all(struct sparsebit *sbitp);
+
+void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
+ unsigned int indent);
+void sparsebit_validate_internal(struct sparsebit *sbit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TEST_SPARSEBIT_H_ */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
new file mode 100644
index 000000000000..ac53730b30aa
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -0,0 +1,46 @@
+/*
+ * tools/testing/selftests/kvm/include/test_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef TEST_UTIL_H
+#define TEST_UTIL_H 1
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include "kselftest.h"
+
+ssize_t test_write(int fd, const void *buf, size_t count);
+ssize_t test_read(int fd, void *buf, size_t count);
+int test_seq_read(const char *path, char **bufp, size_t *sizep);
+
+void test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line, const char *fmt, ...);
+
+#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0]))
+
+#define TEST_ASSERT(e, fmt, ...) \
+ test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+#define ASSERT_EQ(a, b) do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ TEST_ASSERT(__a == __b, \
+ "ASSERT_EQ(%s, %s) failed.\n" \
+ "\t%s is %#lx\n" \
+ "\t%s is %#lx", \
+ #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+} while (0)
+
+#endif /* TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h
new file mode 100644
index 000000000000..6ed8499807fd
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/vmx.h
@@ -0,0 +1,494 @@
+/*
+ * tools/testing/selftests/kvm/include/vmx.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef SELFTEST_KVM_VMX_H
+#define SELFTEST_KVM_VMX_H
+
+#include <stdint.h>
+#include "x86.h"
+
+#define CPUID_VMX_BIT 5
+
+#define CPUID_VMX (1 << 5)
+
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
+#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
+#define CPU_BASED_HLT_EXITING 0x00000080
+#define CPU_BASED_INVLPG_EXITING 0x00000200
+#define CPU_BASED_MWAIT_EXITING 0x00000400
+#define CPU_BASED_RDPMC_EXITING 0x00000800
+#define CPU_BASED_RDTSC_EXITING 0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
+#define CPU_BASED_CR3_STORE_EXITING 0x00010000
+#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
+#define CPU_BASED_CR8_STORE_EXITING 0x00100000
+#define CPU_BASED_TPR_SHADOW 0x00200000
+#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000
+#define CPU_BASED_MOV_DR_EXITING 0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
+#define CPU_BASED_USE_IO_BITMAPS 0x02000000
+#define CPU_BASED_MONITOR_TRAP 0x08000000
+#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
+#define CPU_BASED_MONITOR_EXITING 0x20000000
+#define CPU_BASED_PAUSE_EXITING 0x40000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
+
+#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172
+
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_DESC 0x00000004
+#define SECONDARY_EXEC_RDTSCP 0x00000008
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
+#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
+#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
+#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800
+#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
+#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
+#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
+#define SECONDARY_EXEC_ENABLE_PML 0x00020000
+#define SECONDARY_EPT_VE 0x00040000
+#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000
+#define SECONDARY_EXEC_TSC_SCALING 0x02000000
+
+#define PIN_BASED_EXT_INTR_MASK 0x00000001
+#define PIN_BASED_NMI_EXITING 0x00000008
+#define PIN_BASED_VIRTUAL_NMIS 0x00000020
+#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
+#define PIN_BASED_POSTED_INTR 0x00000080
+
+#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
+
+#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
+#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000
+#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
+#define VM_EXIT_SAVE_IA32_PAT 0x00040000
+#define VM_EXIT_LOAD_IA32_PAT 0x00080000
+#define VM_EXIT_SAVE_IA32_EFER 0x00100000
+#define VM_EXIT_LOAD_IA32_EFER 0x00200000
+#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
+
+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
+
+#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004
+#define VM_ENTRY_IA32E_MODE 0x00000200
+#define VM_ENTRY_SMM 0x00000400
+#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
+#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
+#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
+#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
+
+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
+
+#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
+#define VMX_MISC_SAVE_EFER_LMA 0x00000020
+
+#define EXIT_REASON_FAILED_VMENTRY 0x80000000
+#define EXIT_REASON_EXCEPTION_NMI 0
+#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+#define EXIT_REASON_TRIPLE_FAULT 2
+#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_NMI_WINDOW 8
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID 10
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVD 13
+#define EXIT_REASON_INVLPG 14
+#define EXIT_REASON_RDPMC 15
+#define EXIT_REASON_RDTSC 16
+#define EXIT_REASON_VMCALL 18
+#define EXIT_REASON_VMCLEAR 19
+#define EXIT_REASON_VMLAUNCH 20
+#define EXIT_REASON_VMPTRLD 21
+#define EXIT_REASON_VMPTRST 22
+#define EXIT_REASON_VMREAD 23
+#define EXIT_REASON_VMRESUME 24
+#define EXIT_REASON_VMWRITE 25
+#define EXIT_REASON_VMOFF 26
+#define EXIT_REASON_VMON 27
+#define EXIT_REASON_CR_ACCESS 28
+#define EXIT_REASON_DR_ACCESS 29
+#define EXIT_REASON_IO_INSTRUCTION 30
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+#define EXIT_REASON_INVALID_STATE 33
+#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
+#define EXIT_REASON_MCE_DURING_VMENTRY 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EOI_INDUCED 45
+#define EXIT_REASON_EPT_VIOLATION 48
+#define EXIT_REASON_EPT_MISCONFIG 49
+#define EXIT_REASON_INVEPT 50
+#define EXIT_REASON_RDTSCP 51
+#define EXIT_REASON_PREEMPTION_TIMER 52
+#define EXIT_REASON_INVVPID 53
+#define EXIT_REASON_WBINVD 54
+#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_APIC_WRITE 56
+#define EXIT_REASON_INVPCID 58
+#define EXIT_REASON_PML_FULL 62
+#define EXIT_REASON_XSAVES 63
+#define EXIT_REASON_XRSTORS 64
+#define LAST_EXIT_REASON 64
+
+enum vmcs_field {
+ VIRTUAL_PROCESSOR_ID = 0x00000000,
+ POSTED_INTR_NV = 0x00000002,
+ GUEST_ES_SELECTOR = 0x00000800,
+ GUEST_CS_SELECTOR = 0x00000802,
+ GUEST_SS_SELECTOR = 0x00000804,
+ GUEST_DS_SELECTOR = 0x00000806,
+ GUEST_FS_SELECTOR = 0x00000808,
+ GUEST_GS_SELECTOR = 0x0000080a,
+ GUEST_LDTR_SELECTOR = 0x0000080c,
+ GUEST_TR_SELECTOR = 0x0000080e,
+ GUEST_INTR_STATUS = 0x00000810,
+ GUEST_PML_INDEX = 0x00000812,
+ HOST_ES_SELECTOR = 0x00000c00,
+ HOST_CS_SELECTOR = 0x00000c02,
+ HOST_SS_SELECTOR = 0x00000c04,
+ HOST_DS_SELECTOR = 0x00000c06,
+ HOST_FS_SELECTOR = 0x00000c08,
+ HOST_GS_SELECTOR = 0x00000c0a,
+ HOST_TR_SELECTOR = 0x00000c0c,
+ IO_BITMAP_A = 0x00002000,
+ IO_BITMAP_A_HIGH = 0x00002001,
+ IO_BITMAP_B = 0x00002002,
+ IO_BITMAP_B_HIGH = 0x00002003,
+ MSR_BITMAP = 0x00002004,
+ MSR_BITMAP_HIGH = 0x00002005,
+ VM_EXIT_MSR_STORE_ADDR = 0x00002006,
+ VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007,
+ VM_EXIT_MSR_LOAD_ADDR = 0x00002008,
+ VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
+ VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
+ VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
+ PML_ADDRESS = 0x0000200e,
+ PML_ADDRESS_HIGH = 0x0000200f,
+ TSC_OFFSET = 0x00002010,
+ TSC_OFFSET_HIGH = 0x00002011,
+ VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
+ VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
+ APIC_ACCESS_ADDR = 0x00002014,
+ APIC_ACCESS_ADDR_HIGH = 0x00002015,
+ POSTED_INTR_DESC_ADDR = 0x00002016,
+ POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
+ EPT_POINTER = 0x0000201a,
+ EPT_POINTER_HIGH = 0x0000201b,
+ EOI_EXIT_BITMAP0 = 0x0000201c,
+ EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
+ EOI_EXIT_BITMAP1 = 0x0000201e,
+ EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
+ EOI_EXIT_BITMAP2 = 0x00002020,
+ EOI_EXIT_BITMAP2_HIGH = 0x00002021,
+ EOI_EXIT_BITMAP3 = 0x00002022,
+ EOI_EXIT_BITMAP3_HIGH = 0x00002023,
+ VMREAD_BITMAP = 0x00002026,
+ VMREAD_BITMAP_HIGH = 0x00002027,
+ VMWRITE_BITMAP = 0x00002028,
+ VMWRITE_BITMAP_HIGH = 0x00002029,
+ XSS_EXIT_BITMAP = 0x0000202C,
+ XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ TSC_MULTIPLIER = 0x00002032,
+ TSC_MULTIPLIER_HIGH = 0x00002033,
+ GUEST_PHYSICAL_ADDRESS = 0x00002400,
+ GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
+ VMCS_LINK_POINTER = 0x00002800,
+ VMCS_LINK_POINTER_HIGH = 0x00002801,
+ GUEST_IA32_DEBUGCTL = 0x00002802,
+ GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ GUEST_IA32_PAT = 0x00002804,
+ GUEST_IA32_PAT_HIGH = 0x00002805,
+ GUEST_IA32_EFER = 0x00002806,
+ GUEST_IA32_EFER_HIGH = 0x00002807,
+ GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
+ GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+ GUEST_PDPTR0 = 0x0000280a,
+ GUEST_PDPTR0_HIGH = 0x0000280b,
+ GUEST_PDPTR1 = 0x0000280c,
+ GUEST_PDPTR1_HIGH = 0x0000280d,
+ GUEST_PDPTR2 = 0x0000280e,
+ GUEST_PDPTR2_HIGH = 0x0000280f,
+ GUEST_PDPTR3 = 0x00002810,
+ GUEST_PDPTR3_HIGH = 0x00002811,
+ GUEST_BNDCFGS = 0x00002812,
+ GUEST_BNDCFGS_HIGH = 0x00002813,
+ HOST_IA32_PAT = 0x00002c00,
+ HOST_IA32_PAT_HIGH = 0x00002c01,
+ HOST_IA32_EFER = 0x00002c02,
+ HOST_IA32_EFER_HIGH = 0x00002c03,
+ HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
+ HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
+ PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
+ CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
+ EXCEPTION_BITMAP = 0x00004004,
+ PAGE_FAULT_ERROR_CODE_MASK = 0x00004006,
+ PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008,
+ CR3_TARGET_COUNT = 0x0000400a,
+ VM_EXIT_CONTROLS = 0x0000400c,
+ VM_EXIT_MSR_STORE_COUNT = 0x0000400e,
+ VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
+ VM_ENTRY_CONTROLS = 0x00004012,
+ VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
+ VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
+ VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
+ VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
+ TPR_THRESHOLD = 0x0000401c,
+ SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
+ PLE_GAP = 0x00004020,
+ PLE_WINDOW = 0x00004022,
+ VM_INSTRUCTION_ERROR = 0x00004400,
+ VM_EXIT_REASON = 0x00004402,
+ VM_EXIT_INTR_INFO = 0x00004404,
+ VM_EXIT_INTR_ERROR_CODE = 0x00004406,
+ IDT_VECTORING_INFO_FIELD = 0x00004408,
+ IDT_VECTORING_ERROR_CODE = 0x0000440a,
+ VM_EXIT_INSTRUCTION_LEN = 0x0000440c,
+ VMX_INSTRUCTION_INFO = 0x0000440e,
+ GUEST_ES_LIMIT = 0x00004800,
+ GUEST_CS_LIMIT = 0x00004802,
+ GUEST_SS_LIMIT = 0x00004804,
+ GUEST_DS_LIMIT = 0x00004806,
+ GUEST_FS_LIMIT = 0x00004808,
+ GUEST_GS_LIMIT = 0x0000480a,
+ GUEST_LDTR_LIMIT = 0x0000480c,
+ GUEST_TR_LIMIT = 0x0000480e,
+ GUEST_GDTR_LIMIT = 0x00004810,
+ GUEST_IDTR_LIMIT = 0x00004812,
+ GUEST_ES_AR_BYTES = 0x00004814,
+ GUEST_CS_AR_BYTES = 0x00004816,
+ GUEST_SS_AR_BYTES = 0x00004818,
+ GUEST_DS_AR_BYTES = 0x0000481a,
+ GUEST_FS_AR_BYTES = 0x0000481c,
+ GUEST_GS_AR_BYTES = 0x0000481e,
+ GUEST_LDTR_AR_BYTES = 0x00004820,
+ GUEST_TR_AR_BYTES = 0x00004822,
+ GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
+ GUEST_ACTIVITY_STATE = 0X00004826,
+ GUEST_SYSENTER_CS = 0x0000482A,
+ VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ HOST_IA32_SYSENTER_CS = 0x00004c00,
+ CR0_GUEST_HOST_MASK = 0x00006000,
+ CR4_GUEST_HOST_MASK = 0x00006002,
+ CR0_READ_SHADOW = 0x00006004,
+ CR4_READ_SHADOW = 0x00006006,
+ CR3_TARGET_VALUE0 = 0x00006008,
+ CR3_TARGET_VALUE1 = 0x0000600a,
+ CR3_TARGET_VALUE2 = 0x0000600c,
+ CR3_TARGET_VALUE3 = 0x0000600e,
+ EXIT_QUALIFICATION = 0x00006400,
+ GUEST_LINEAR_ADDRESS = 0x0000640a,
+ GUEST_CR0 = 0x00006800,
+ GUEST_CR3 = 0x00006802,
+ GUEST_CR4 = 0x00006804,
+ GUEST_ES_BASE = 0x00006806,
+ GUEST_CS_BASE = 0x00006808,
+ GUEST_SS_BASE = 0x0000680a,
+ GUEST_DS_BASE = 0x0000680c,
+ GUEST_FS_BASE = 0x0000680e,
+ GUEST_GS_BASE = 0x00006810,
+ GUEST_LDTR_BASE = 0x00006812,
+ GUEST_TR_BASE = 0x00006814,
+ GUEST_GDTR_BASE = 0x00006816,
+ GUEST_IDTR_BASE = 0x00006818,
+ GUEST_DR7 = 0x0000681a,
+ GUEST_RSP = 0x0000681c,
+ GUEST_RIP = 0x0000681e,
+ GUEST_RFLAGS = 0x00006820,
+ GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
+ GUEST_SYSENTER_ESP = 0x00006824,
+ GUEST_SYSENTER_EIP = 0x00006826,
+ HOST_CR0 = 0x00006c00,
+ HOST_CR3 = 0x00006c02,
+ HOST_CR4 = 0x00006c04,
+ HOST_FS_BASE = 0x00006c06,
+ HOST_GS_BASE = 0x00006c08,
+ HOST_TR_BASE = 0x00006c0a,
+ HOST_GDTR_BASE = 0x00006c0c,
+ HOST_IDTR_BASE = 0x00006c0e,
+ HOST_IA32_SYSENTER_ESP = 0x00006c10,
+ HOST_IA32_SYSENTER_EIP = 0x00006c12,
+ HOST_RSP = 0x00006c14,
+ HOST_RIP = 0x00006c16,
+};
+
+struct vmx_msr_entry {
+ uint32_t index;
+ uint32_t reserved;
+ uint64_t value;
+} __attribute__ ((aligned(16)));
+
+static inline int vmxon(uint64_t phys)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(phys)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline void vmxoff(void)
+{
+ __asm__ __volatile__("vmxoff");
+}
+
+static inline int vmclear(uint64_t vmcs_pa)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(vmcs_pa)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int vmptrld(uint64_t vmcs_pa)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(vmcs_pa)
+ : "cc", "memory");
+
+ return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmlaunch.
+ */
+static inline int vmlaunch(void)
+{
+ int ret;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "vmwrite %%rsp, %[host_rsp];"
+ "lea 1f(%%rip), %%rax;"
+ "vmwrite %%rax, %[host_rip];"
+ "vmlaunch;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"((uint64_t)HOST_RSP),
+ [host_rip]"r"((uint64_t)HOST_RIP)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int vmresume(void)
+{
+ int ret;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "vmwrite %%rsp, %[host_rsp];"
+ "lea 1f(%%rip), %%rax;"
+ "vmwrite %%rax, %[host_rip];"
+ "vmresume;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"((uint64_t)HOST_RSP),
+ [host_rip]"r"((uint64_t)HOST_RIP)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+static inline int vmread(uint64_t encoding, uint64_t *value)
+{
+ uint64_t tmp;
+ uint8_t ret;
+
+ __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
+ : [value]"=rm"(tmp), [ret]"=rm"(ret)
+ : [encoding]"r"(encoding)
+ : "cc", "memory");
+
+ *value = tmp;
+ return ret;
+}
+
+/*
+ * A wrapper around vmread that ignores errors and returns zero if the
+ * vmread instruction fails.
+ */
+static inline uint64_t vmreadz(uint64_t encoding)
+{
+ uint64_t value = 0;
+ vmread(encoding, &value);
+ return value;
+}
+
+static inline int vmwrite(uint64_t encoding, uint64_t value)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [value]"rm"(value), [encoding]"r"(encoding)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline uint32_t vmcs_revision(void)
+{
+ return rdmsr(MSR_IA32_VMX_BASIC);
+}
+
+void prepare_for_vmx_operation(void);
+void prepare_vmcs(void *guest_rip, void *guest_rsp);
+struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid,
+ vmx_guest_code_t guest_code);
+
+#endif /* !SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h
new file mode 100644
index 000000000000..4a5b2c4c1a0f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86.h
@@ -0,0 +1,1043 @@
+/*
+ * tools/testing/selftests/kvm/include/x86.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef SELFTEST_KVM_X86_H
+#define SELFTEST_KVM_X86_H
+
+#include <assert.h>
+#include <stdint.h>
+
+#define X86_EFLAGS_FIXED (1u << 1)
+
+#define X86_CR4_VME (1ul << 0)
+#define X86_CR4_PVI (1ul << 1)
+#define X86_CR4_TSD (1ul << 2)
+#define X86_CR4_DE (1ul << 3)
+#define X86_CR4_PSE (1ul << 4)
+#define X86_CR4_PAE (1ul << 5)
+#define X86_CR4_MCE (1ul << 6)
+#define X86_CR4_PGE (1ul << 7)
+#define X86_CR4_PCE (1ul << 8)
+#define X86_CR4_OSFXSR (1ul << 9)
+#define X86_CR4_OSXMMEXCPT (1ul << 10)
+#define X86_CR4_UMIP (1ul << 11)
+#define X86_CR4_VMXE (1ul << 13)
+#define X86_CR4_SMXE (1ul << 14)
+#define X86_CR4_FSGSBASE (1ul << 16)
+#define X86_CR4_PCIDE (1ul << 17)
+#define X86_CR4_OSXSAVE (1ul << 18)
+#define X86_CR4_SMEP (1ul << 20)
+#define X86_CR4_SMAP (1ul << 21)
+#define X86_CR4_PKE (1ul << 22)
+
+/* The enum values match the intruction encoding of each register */
+enum x86_register {
+ RAX = 0,
+ RCX,
+ RDX,
+ RBX,
+ RSP,
+ RBP,
+ RSI,
+ RDI,
+ R8,
+ R9,
+ R10,
+ R11,
+ R12,
+ R13,
+ R14,
+ R15,
+};
+
+struct desc64 {
+ uint16_t limit0;
+ uint16_t base0;
+ unsigned base1:8, type:5, dpl:2, p:1;
+ unsigned limit1:4, zero0:3, g:1, base2:8;
+ uint32_t base3;
+ uint32_t zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+ uint16_t size;
+ uint64_t address;
+} __attribute__((packed));
+
+static inline uint64_t get_desc64_base(const struct desc64 *desc)
+{
+ return ((uint64_t)desc->base3 << 32) |
+ (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline uint64_t rdtsc(void)
+{
+ uint32_t eax, edx;
+
+ /*
+ * The lfence is to wait (on Intel CPUs) until all previous
+ * instructions have been executed.
+ */
+ __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
+ return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdtscp(uint32_t *aux)
+{
+ uint32_t eax, edx;
+
+ __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
+ return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+
+static inline uint16_t inw(uint16_t port)
+{
+ uint16_t tmp;
+
+ __asm__ __volatile__("in %%dx, %%ax"
+ : /* output */ "=a" (tmp)
+ : /* input */ "d" (port));
+
+ return tmp;
+}
+
+static inline uint16_t get_es(void)
+{
+ uint16_t es;
+
+ __asm__ __volatile__("mov %%es, %[es]"
+ : /* output */ [es]"=rm"(es));
+ return es;
+}
+
+static inline uint16_t get_cs(void)
+{
+ uint16_t cs;
+
+ __asm__ __volatile__("mov %%cs, %[cs]"
+ : /* output */ [cs]"=rm"(cs));
+ return cs;
+}
+
+static inline uint16_t get_ss(void)
+{
+ uint16_t ss;
+
+ __asm__ __volatile__("mov %%ss, %[ss]"
+ : /* output */ [ss]"=rm"(ss));
+ return ss;
+}
+
+static inline uint16_t get_ds(void)
+{
+ uint16_t ds;
+
+ __asm__ __volatile__("mov %%ds, %[ds]"
+ : /* output */ [ds]"=rm"(ds));
+ return ds;
+}
+
+static inline uint16_t get_fs(void)
+{
+ uint16_t fs;
+
+ __asm__ __volatile__("mov %%fs, %[fs]"
+ : /* output */ [fs]"=rm"(fs));
+ return fs;
+}
+
+static inline uint16_t get_gs(void)
+{
+ uint16_t gs;
+
+ __asm__ __volatile__("mov %%gs, %[gs]"
+ : /* output */ [gs]"=rm"(gs));
+ return gs;
+}
+
+static inline uint16_t get_tr(void)
+{
+ uint16_t tr;
+
+ __asm__ __volatile__("str %[tr]"
+ : /* output */ [tr]"=rm"(tr));
+ return tr;
+}
+
+static inline uint64_t get_cr0(void)
+{
+ uint64_t cr0;
+
+ __asm__ __volatile__("mov %%cr0, %[cr0]"
+ : /* output */ [cr0]"=r"(cr0));
+ return cr0;
+}
+
+static inline uint64_t get_cr3(void)
+{
+ uint64_t cr3;
+
+ __asm__ __volatile__("mov %%cr3, %[cr3]"
+ : /* output */ [cr3]"=r"(cr3));
+ return cr3;
+}
+
+static inline uint64_t get_cr4(void)
+{
+ uint64_t cr4;
+
+ __asm__ __volatile__("mov %%cr4, %[cr4]"
+ : /* output */ [cr4]"=r"(cr4));
+ return cr4;
+}
+
+static inline void set_cr4(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
+}
+
+static inline uint64_t get_gdt_base(void)
+{
+ struct desc_ptr gdt;
+ __asm__ __volatile__("sgdt %[gdt]"
+ : /* output */ [gdt]"=m"(gdt));
+ return gdt.address;
+}
+
+static inline uint64_t get_idt_base(void)
+{
+ struct desc_ptr idt;
+ __asm__ __volatile__("sidt %[idt]"
+ : /* output */ [idt]"=m"(idt));
+ return idt.address;
+}
+
+#define SET_XMM(__var, __xmm) \
+ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
+
+static inline void set_xmm(int n, unsigned long val)
+{
+ switch (n) {
+ case 0:
+ SET_XMM(val, xmm0);
+ break;
+ case 1:
+ SET_XMM(val, xmm1);
+ break;
+ case 2:
+ SET_XMM(val, xmm2);
+ break;
+ case 3:
+ SET_XMM(val, xmm3);
+ break;
+ case 4:
+ SET_XMM(val, xmm4);
+ break;
+ case 5:
+ SET_XMM(val, xmm5);
+ break;
+ case 6:
+ SET_XMM(val, xmm6);
+ break;
+ case 7:
+ SET_XMM(val, xmm7);
+ break;
+ }
+}
+
+typedef unsigned long v1di __attribute__ ((vector_size (8)));
+static inline unsigned long get_xmm(int n)
+{
+ assert(n >= 0 && n <= 7);
+
+ register v1di xmm0 __asm__("%xmm0");
+ register v1di xmm1 __asm__("%xmm1");
+ register v1di xmm2 __asm__("%xmm2");
+ register v1di xmm3 __asm__("%xmm3");
+ register v1di xmm4 __asm__("%xmm4");
+ register v1di xmm5 __asm__("%xmm5");
+ register v1di xmm6 __asm__("%xmm6");
+ register v1di xmm7 __asm__("%xmm7");
+ switch (n) {
+ case 0:
+ return (unsigned long)xmm0;
+ case 1:
+ return (unsigned long)xmm1;
+ case 2:
+ return (unsigned long)xmm2;
+ case 3:
+ return (unsigned long)xmm3;
+ case 4:
+ return (unsigned long)xmm4;
+ case 5:
+ return (unsigned long)xmm5;
+ case 6:
+ return (unsigned long)xmm6;
+ case 7:
+ return (unsigned long)xmm7;
+ }
+ return 0;
+}
+
+/*
+ * Basic CPU control in CR0
+ */
+#define X86_CR0_PE (1UL<<0) /* Protection Enable */
+#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */
+#define X86_CR0_EM (1UL<<2) /* Emulation */
+#define X86_CR0_TS (1UL<<3) /* Task Switched */
+#define X86_CR0_ET (1UL<<4) /* Extension Type */
+#define X86_CR0_NE (1UL<<5) /* Numeric Error */
+#define X86_CR0_WP (1UL<<16) /* Write Protect */
+#define X86_CR0_AM (1UL<<18) /* Alignment Mask */
+#define X86_CR0_NW (1UL<<29) /* Not Write-through */
+#define X86_CR0_CD (1UL<<30) /* Cache Disable */
+#define X86_CR0_PG (1UL<<31) /* Paging */
+
+/*
+ * CPU model specific register (MSR) numbers.
+ */
+
+/* x86-64 specific MSRs */
+#define MSR_EFER 0xc0000080 /* extended feature register */
+#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
+#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
+#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */
+#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
+#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
+#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
+#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
+#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */
+
+/* EFER bits: */
+#define EFER_SCE (1<<0) /* SYSCALL/SYSRET */
+#define EFER_LME (1<<8) /* Long mode enable */
+#define EFER_LMA (1<<10) /* Long mode active (read-only) */
+#define EFER_NX (1<<11) /* No execute enable */
+#define EFER_SVME (1<<12) /* Enable virtualization */
+#define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */
+#define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */
+
+/* Intel MSRs. Some also available on other CPUs */
+
+#define MSR_PPIN_CTL 0x0000004e
+#define MSR_PPIN 0x0000004f
+
+#define MSR_IA32_PERFCTR0 0x000000c1
+#define MSR_IA32_PERFCTR1 0x000000c2
+#define MSR_FSB_FREQ 0x000000cd
+#define MSR_PLATFORM_INFO 0x000000ce
+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
+#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
+
+#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
+#define NHM_C3_AUTO_DEMOTE (1UL << 25)
+#define NHM_C1_AUTO_DEMOTE (1UL << 26)
+#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
+#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
+#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
+
+#define MSR_MTRRcap 0x000000fe
+#define MSR_IA32_BBL_CR_CTL 0x00000119
+#define MSR_IA32_BBL_CR_CTL3 0x0000011e
+
+#define MSR_IA32_SYSENTER_CS 0x00000174
+#define MSR_IA32_SYSENTER_ESP 0x00000175
+#define MSR_IA32_SYSENTER_EIP 0x00000176
+
+#define MSR_IA32_MCG_CAP 0x00000179
+#define MSR_IA32_MCG_STATUS 0x0000017a
+#define MSR_IA32_MCG_CTL 0x0000017b
+#define MSR_IA32_MCG_EXT_CTL 0x000004d0
+
+#define MSR_OFFCORE_RSP_0 0x000001a6
+#define MSR_OFFCORE_RSP_1 0x000001a7
+#define MSR_TURBO_RATIO_LIMIT 0x000001ad
+#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
+#define MSR_TURBO_RATIO_LIMIT2 0x000001af
+
+#define MSR_LBR_SELECT 0x000001c8
+#define MSR_LBR_TOS 0x000001c9
+#define MSR_LBR_NHM_FROM 0x00000680
+#define MSR_LBR_NHM_TO 0x000006c0
+#define MSR_LBR_CORE_FROM 0x00000040
+#define MSR_LBR_CORE_TO 0x00000060
+
+#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */
+#define LBR_INFO_MISPRED BIT_ULL(63)
+#define LBR_INFO_IN_TX BIT_ULL(62)
+#define LBR_INFO_ABORT BIT_ULL(61)
+#define LBR_INFO_CYCLES 0xffff
+
+#define MSR_IA32_PEBS_ENABLE 0x000003f1
+#define MSR_IA32_DS_AREA 0x00000600
+#define MSR_IA32_PERF_CAPABILITIES 0x00000345
+#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
+
+#define MSR_IA32_RTIT_CTL 0x00000570
+#define MSR_IA32_RTIT_STATUS 0x00000571
+#define MSR_IA32_RTIT_ADDR0_A 0x00000580
+#define MSR_IA32_RTIT_ADDR0_B 0x00000581
+#define MSR_IA32_RTIT_ADDR1_A 0x00000582
+#define MSR_IA32_RTIT_ADDR1_B 0x00000583
+#define MSR_IA32_RTIT_ADDR2_A 0x00000584
+#define MSR_IA32_RTIT_ADDR2_B 0x00000585
+#define MSR_IA32_RTIT_ADDR3_A 0x00000586
+#define MSR_IA32_RTIT_ADDR3_B 0x00000587
+#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
+#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
+#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
+
+#define MSR_MTRRfix64K_00000 0x00000250
+#define MSR_MTRRfix16K_80000 0x00000258
+#define MSR_MTRRfix16K_A0000 0x00000259
+#define MSR_MTRRfix4K_C0000 0x00000268
+#define MSR_MTRRfix4K_C8000 0x00000269
+#define MSR_MTRRfix4K_D0000 0x0000026a
+#define MSR_MTRRfix4K_D8000 0x0000026b
+#define MSR_MTRRfix4K_E0000 0x0000026c
+#define MSR_MTRRfix4K_E8000 0x0000026d
+#define MSR_MTRRfix4K_F0000 0x0000026e
+#define MSR_MTRRfix4K_F8000 0x0000026f
+#define MSR_MTRRdefType 0x000002ff
+
+#define MSR_IA32_CR_PAT 0x00000277
+
+#define MSR_IA32_DEBUGCTLMSR 0x000001d9
+#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
+#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
+#define MSR_IA32_LASTINTFROMIP 0x000001dd
+#define MSR_IA32_LASTINTTOIP 0x000001de
+
+/* DEBUGCTLMSR bits (others vary by model): */
+#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT 1
+#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
+#define DEBUGCTLMSR_TR (1UL << 6)
+#define DEBUGCTLMSR_BTS (1UL << 7)
+#define DEBUGCTLMSR_BTINT (1UL << 8)
+#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
+#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
+#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
+#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
+#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+
+#define MSR_PEBS_FRONTEND 0x000003f7
+
+#define MSR_IA32_POWER_CTL 0x000001fc
+
+#define MSR_IA32_MC0_CTL 0x00000400
+#define MSR_IA32_MC0_STATUS 0x00000401
+#define MSR_IA32_MC0_ADDR 0x00000402
+#define MSR_IA32_MC0_MISC 0x00000403
+
+/* C-state Residency Counters */
+#define MSR_PKG_C3_RESIDENCY 0x000003f8
+#define MSR_PKG_C6_RESIDENCY 0x000003f9
+#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa
+#define MSR_PKG_C7_RESIDENCY 0x000003fa
+#define MSR_CORE_C3_RESIDENCY 0x000003fc
+#define MSR_CORE_C6_RESIDENCY 0x000003fd
+#define MSR_CORE_C7_RESIDENCY 0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff
+#define MSR_PKG_C2_RESIDENCY 0x0000060d
+#define MSR_PKG_C8_RESIDENCY 0x00000630
+#define MSR_PKG_C9_RESIDENCY 0x00000631
+#define MSR_PKG_C10_RESIDENCY 0x00000632
+
+/* Interrupt Response Limit */
+#define MSR_PKGC3_IRTL 0x0000060a
+#define MSR_PKGC6_IRTL 0x0000060b
+#define MSR_PKGC7_IRTL 0x0000060c
+#define MSR_PKGC8_IRTL 0x00000633
+#define MSR_PKGC9_IRTL 0x00000634
+#define MSR_PKGC10_IRTL 0x00000635
+
+/* Run Time Average Power Limiting (RAPL) Interface */
+
+#define MSR_RAPL_POWER_UNIT 0x00000606
+
+#define MSR_PKG_POWER_LIMIT 0x00000610
+#define MSR_PKG_ENERGY_STATUS 0x00000611
+#define MSR_PKG_PERF_STATUS 0x00000613
+#define MSR_PKG_POWER_INFO 0x00000614
+
+#define MSR_DRAM_POWER_LIMIT 0x00000618
+#define MSR_DRAM_ENERGY_STATUS 0x00000619
+#define MSR_DRAM_PERF_STATUS 0x0000061b
+#define MSR_DRAM_POWER_INFO 0x0000061c
+
+#define MSR_PP0_POWER_LIMIT 0x00000638
+#define MSR_PP0_ENERGY_STATUS 0x00000639
+#define MSR_PP0_POLICY 0x0000063a
+#define MSR_PP0_PERF_STATUS 0x0000063b
+
+#define MSR_PP1_POWER_LIMIT 0x00000640
+#define MSR_PP1_ENERGY_STATUS 0x00000641
+#define MSR_PP1_POLICY 0x00000642
+
+/* Config TDP MSRs */
+#define MSR_CONFIG_TDP_NOMINAL 0x00000648
+#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
+#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A
+#define MSR_CONFIG_TDP_CONTROL 0x0000064B
+#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
+
+#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
+
+#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
+#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
+#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
+#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
+
+#define MSR_CORE_C1_RES 0x00000660
+#define MSR_MODULE_C6_RES_MS 0x00000664
+
+#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
+#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669
+
+#define MSR_ATOM_CORE_RATIOS 0x0000066a
+#define MSR_ATOM_CORE_VIDS 0x0000066b
+#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
+#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d
+
+
+#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
+#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
+#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
+
+/* Hardware P state interface */
+#define MSR_PPERF 0x0000064e
+#define MSR_PERF_LIMIT_REASONS 0x0000064f
+#define MSR_PM_ENABLE 0x00000770
+#define MSR_HWP_CAPABILITIES 0x00000771
+#define MSR_HWP_REQUEST_PKG 0x00000772
+#define MSR_HWP_INTERRUPT 0x00000773
+#define MSR_HWP_REQUEST 0x00000774
+#define MSR_HWP_STATUS 0x00000777
+
+/* CPUID.6.EAX */
+#define HWP_BASE_BIT (1<<7)
+#define HWP_NOTIFICATIONS_BIT (1<<8)
+#define HWP_ACTIVITY_WINDOW_BIT (1<<9)
+#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10)
+#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11)
+
+/* IA32_HWP_CAPABILITIES */
+#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff)
+#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff)
+#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff)
+#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
+
+/* IA32_HWP_REQUEST */
+#define HWP_MIN_PERF(x) (x & 0xff)
+#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
+#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
+#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
+#define HWP_EPP_PERFORMANCE 0x00
+#define HWP_EPP_BALANCE_PERFORMANCE 0x80
+#define HWP_EPP_BALANCE_POWERSAVE 0xC0
+#define HWP_EPP_POWERSAVE 0xFF
+#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
+#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
+
+/* IA32_HWP_STATUS */
+#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
+#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4)
+
+/* IA32_HWP_INTERRUPT */
+#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1)
+#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2)
+
+#define MSR_AMD64_MC0_MASK 0xc0010044
+
+#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
+#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
+#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
+#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
+
+#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
+
+/* These are consecutive and not in the normal 4er MCE bank block */
+#define MSR_IA32_MC0_CTL2 0x00000280
+#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
+
+#define MSR_P6_PERFCTR0 0x000000c1
+#define MSR_P6_PERFCTR1 0x000000c2
+#define MSR_P6_EVNTSEL0 0x00000186
+#define MSR_P6_EVNTSEL1 0x00000187
+
+#define MSR_KNC_PERFCTR0 0x00000020
+#define MSR_KNC_PERFCTR1 0x00000021
+#define MSR_KNC_EVNTSEL0 0x00000028
+#define MSR_KNC_EVNTSEL1 0x00000029
+
+/* Alternative perfctr range with full access. */
+#define MSR_IA32_PMC0 0x000004c1
+
+/* AMD64 MSRs. Not complete. See the architecture manual for a more
+ complete list. */
+
+#define MSR_AMD64_PATCH_LEVEL 0x0000008b
+#define MSR_AMD64_TSC_RATIO 0xc0000104
+#define MSR_AMD64_NB_CFG 0xc001001f
+#define MSR_AMD64_PATCH_LOADER 0xc0010020
+#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
+#define MSR_AMD64_OSVW_STATUS 0xc0010141
+#define MSR_AMD64_LS_CFG 0xc0011020
+#define MSR_AMD64_DC_CFG 0xc0011022
+#define MSR_AMD64_BU_CFG2 0xc001102a
+#define MSR_AMD64_IBSFETCHCTL 0xc0011030
+#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
+#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
+#define MSR_AMD64_IBSFETCH_REG_COUNT 3
+#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
+#define MSR_AMD64_IBSOPCTL 0xc0011033
+#define MSR_AMD64_IBSOPRIP 0xc0011034
+#define MSR_AMD64_IBSOPDATA 0xc0011035
+#define MSR_AMD64_IBSOPDATA2 0xc0011036
+#define MSR_AMD64_IBSOPDATA3 0xc0011037
+#define MSR_AMD64_IBSDCLINAD 0xc0011038
+#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
+#define MSR_AMD64_IBSOP_REG_COUNT 7
+#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
+#define MSR_AMD64_IBSCTL 0xc001103a
+#define MSR_AMD64_IBSBRTARGET 0xc001103b
+#define MSR_AMD64_IBSOPDATA4 0xc001103d
+#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SEV 0xc0010131
+#define MSR_AMD64_SEV_ENABLED_BIT 0
+#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF 0xc00000e9
+
+/* Fam 16h MSRs */
+#define MSR_F16H_L2I_PERF_CTL 0xc0010230
+#define MSR_F16H_L2I_PERF_CTR 0xc0010231
+#define MSR_F16H_DR1_ADDR_MASK 0xc0011019
+#define MSR_F16H_DR2_ADDR_MASK 0xc001101a
+#define MSR_F16H_DR3_ADDR_MASK 0xc001101b
+#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
+
+/* Fam 15h MSRs */
+#define MSR_F15H_PERF_CTL 0xc0010200
+#define MSR_F15H_PERF_CTR 0xc0010201
+#define MSR_F15H_NB_PERF_CTL 0xc0010240
+#define MSR_F15H_NB_PERF_CTR 0xc0010241
+#define MSR_F15H_PTSC 0xc0010280
+#define MSR_F15H_IC_CFG 0xc0011021
+
+/* Fam 10h MSRs */
+#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
+#define FAM10H_MMIO_CONF_ENABLE (1<<0)
+#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
+#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
+#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
+#define FAM10H_MMIO_CONF_BASE_SHIFT 20
+#define MSR_FAM10H_NODE_ID 0xc001100c
+#define MSR_F10H_DECFG 0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
+
+/* K8 MSRs */
+#define MSR_K8_TOP_MEM1 0xc001001a
+#define MSR_K8_TOP_MEM2 0xc001001d
+#define MSR_K8_SYSCFG 0xc0010010
+#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_K8_INT_PENDING_MSG 0xc0010055
+/* C1E active bits in int pending message */
+#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
+#define MSR_K8_TSEG_ADDR 0xc0010112
+#define MSR_K8_TSEG_MASK 0xc0010113
+#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
+#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
+#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
+
+/* K7 MSRs */
+#define MSR_K7_EVNTSEL0 0xc0010000
+#define MSR_K7_PERFCTR0 0xc0010004
+#define MSR_K7_EVNTSEL1 0xc0010001
+#define MSR_K7_PERFCTR1 0xc0010005
+#define MSR_K7_EVNTSEL2 0xc0010002
+#define MSR_K7_PERFCTR2 0xc0010006
+#define MSR_K7_EVNTSEL3 0xc0010003
+#define MSR_K7_PERFCTR3 0xc0010007
+#define MSR_K7_CLK_CTL 0xc001001b
+#define MSR_K7_HWCR 0xc0010015
+#define MSR_K7_HWCR_SMMLOCK_BIT 0
+#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_FID_VID_CTL 0xc0010041
+#define MSR_K7_FID_VID_STATUS 0xc0010042
+
+/* K6 MSRs */
+#define MSR_K6_WHCR 0xc0000082
+#define MSR_K6_UWCCR 0xc0000085
+#define MSR_K6_EPMR 0xc0000086
+#define MSR_K6_PSOR 0xc0000087
+#define MSR_K6_PFIR 0xc0000088
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1 0x00000107
+#define MSR_IDT_FCR2 0x00000108
+#define MSR_IDT_FCR3 0x00000109
+#define MSR_IDT_FCR4 0x0000010a
+
+#define MSR_IDT_MCR0 0x00000110
+#define MSR_IDT_MCR1 0x00000111
+#define MSR_IDT_MCR2 0x00000112
+#define MSR_IDT_MCR3 0x00000113
+#define MSR_IDT_MCR4 0x00000114
+#define MSR_IDT_MCR5 0x00000115
+#define MSR_IDT_MCR6 0x00000116
+#define MSR_IDT_MCR7 0x00000117
+#define MSR_IDT_MCR_CTRL 0x00000120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR 0x00001107
+#define MSR_VIA_LONGHAUL 0x0000110a
+#define MSR_VIA_RNG 0x0000110b
+#define MSR_VIA_BCR2 0x00001147
+
+/* Transmeta defined MSRs */
+#define MSR_TMTA_LONGRUN_CTRL 0x80868010
+#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
+#define MSR_TMTA_LRTI_READOUT 0x80868018
+#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
+
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR 0x00000000
+#define MSR_IA32_P5_MC_TYPE 0x00000001
+#define MSR_IA32_TSC 0x00000010
+#define MSR_IA32_PLATFORM_ID 0x00000017
+#define MSR_IA32_EBL_CR_POWERON 0x0000002a
+#define MSR_EBC_FREQUENCY_ID 0x0000002c
+#define MSR_SMI_COUNT 0x00000034
+#define MSR_IA32_FEATURE_CONTROL 0x0000003a
+#define MSR_IA32_TSC_ADJUST 0x0000003b
+#define MSR_IA32_BNDCFGS 0x00000d90
+
+#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
+
+#define MSR_IA32_XSS 0x00000da0
+
+#define FEATURE_CONTROL_LOCKED (1<<0)
+#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
+#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
+#define FEATURE_CONTROL_LMCE (1<<20)
+
+#define MSR_IA32_APICBASE 0x0000001b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+
+#define MSR_IA32_TSCDEADLINE 0x000006e0
+
+#define MSR_IA32_UCODE_WRITE 0x00000079
+#define MSR_IA32_UCODE_REV 0x0000008b
+
+#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
+#define MSR_IA32_SMBASE 0x0000009e
+
+#define MSR_IA32_PERF_STATUS 0x00000198
+#define MSR_IA32_PERF_CTL 0x00000199
+#define INTEL_PERF_CTL_MASK 0xffff
+#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
+#define MSR_AMD_PERF_STATUS 0xc0010063
+#define MSR_AMD_PERF_CTL 0xc0010062
+
+#define MSR_IA32_MPERF 0x000000e7
+#define MSR_IA32_APERF 0x000000e8
+
+#define MSR_IA32_THERM_CONTROL 0x0000019a
+#define MSR_IA32_THERM_INTERRUPT 0x0000019b
+
+#define THERM_INT_HIGH_ENABLE (1 << 0)
+#define THERM_INT_LOW_ENABLE (1 << 1)
+#define THERM_INT_PLN_ENABLE (1 << 24)
+
+#define MSR_IA32_THERM_STATUS 0x0000019c
+
+#define THERM_STATUS_PROCHOT (1 << 0)
+#define THERM_STATUS_POWER_LIMIT (1 << 10)
+
+#define MSR_THERM2_CTL 0x0000019d
+
+#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16)
+
+#define MSR_IA32_MISC_ENABLE 0x000001a0
+
+#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2
+
+#define MSR_MISC_FEATURE_CONTROL 0x000001a4
+#define MSR_MISC_PWR_MGMT 0x000001aa
+
+#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
+#define ENERGY_PERF_BIAS_PERFORMANCE 0
+#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4
+#define ENERGY_PERF_BIAS_NORMAL 6
+#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8
+#define ENERGY_PERF_BIAS_POWERSAVE 15
+
+#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1
+
+#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
+#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
+
+#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
+
+#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
+#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
+#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
+
+/* Thermal Thresholds Support */
+#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
+#define THERM_SHIFT_THRESHOLD0 8
+#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0)
+#define THERM_INT_THRESHOLD1_ENABLE (1 << 23)
+#define THERM_SHIFT_THRESHOLD1 16
+#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1)
+#define THERM_STATUS_THRESHOLD0 (1 << 6)
+#define THERM_LOG_THRESHOLD0 (1 << 7)
+#define THERM_STATUS_THRESHOLD1 (1 << 8)
+#define THERM_LOG_THRESHOLD1 (1 << 9)
+
+/* MISC_ENABLE bits: architectural */
+#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0
+#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
+#define MSR_IA32_MISC_ENABLE_TCC_BIT 1
+#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
+#define MSR_IA32_MISC_ENABLE_EMON_BIT 7
+#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
+#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18
+#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
+
+/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
+#define MSR_IA32_MISC_ENABLE_TM1_BIT 3
+#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_BIT 10
+#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
+#define MSR_IA32_MISC_ENABLE_TM2_BIT 13
+#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
+
+/* MISC_FEATURES_ENABLES non-architectural features */
+#define MSR_MISC_FEATURES_ENABLES 0x00000140
+
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
+
+#define MSR_IA32_TSC_DEADLINE 0x000006E0
+
+/* P4/Xeon+ specific */
+#define MSR_IA32_MCG_EAX 0x00000180
+#define MSR_IA32_MCG_EBX 0x00000181
+#define MSR_IA32_MCG_ECX 0x00000182
+#define MSR_IA32_MCG_EDX 0x00000183
+#define MSR_IA32_MCG_ESI 0x00000184
+#define MSR_IA32_MCG_EDI 0x00000185
+#define MSR_IA32_MCG_EBP 0x00000186
+#define MSR_IA32_MCG_ESP 0x00000187
+#define MSR_IA32_MCG_EFLAGS 0x00000188
+#define MSR_IA32_MCG_EIP 0x00000189
+#define MSR_IA32_MCG_RESERVED 0x0000018a
+
+/* Pentium IV performance counter MSRs */
+#define MSR_P4_BPU_PERFCTR0 0x00000300
+#define MSR_P4_BPU_PERFCTR1 0x00000301
+#define MSR_P4_BPU_PERFCTR2 0x00000302
+#define MSR_P4_BPU_PERFCTR3 0x00000303
+#define MSR_P4_MS_PERFCTR0 0x00000304
+#define MSR_P4_MS_PERFCTR1 0x00000305
+#define MSR_P4_MS_PERFCTR2 0x00000306
+#define MSR_P4_MS_PERFCTR3 0x00000307
+#define MSR_P4_FLAME_PERFCTR0 0x00000308
+#define MSR_P4_FLAME_PERFCTR1 0x00000309
+#define MSR_P4_FLAME_PERFCTR2 0x0000030a
+#define MSR_P4_FLAME_PERFCTR3 0x0000030b
+#define MSR_P4_IQ_PERFCTR0 0x0000030c
+#define MSR_P4_IQ_PERFCTR1 0x0000030d
+#define MSR_P4_IQ_PERFCTR2 0x0000030e
+#define MSR_P4_IQ_PERFCTR3 0x0000030f
+#define MSR_P4_IQ_PERFCTR4 0x00000310
+#define MSR_P4_IQ_PERFCTR5 0x00000311
+#define MSR_P4_BPU_CCCR0 0x00000360
+#define MSR_P4_BPU_CCCR1 0x00000361
+#define MSR_P4_BPU_CCCR2 0x00000362
+#define MSR_P4_BPU_CCCR3 0x00000363
+#define MSR_P4_MS_CCCR0 0x00000364
+#define MSR_P4_MS_CCCR1 0x00000365
+#define MSR_P4_MS_CCCR2 0x00000366
+#define MSR_P4_MS_CCCR3 0x00000367
+#define MSR_P4_FLAME_CCCR0 0x00000368
+#define MSR_P4_FLAME_CCCR1 0x00000369
+#define MSR_P4_FLAME_CCCR2 0x0000036a
+#define MSR_P4_FLAME_CCCR3 0x0000036b
+#define MSR_P4_IQ_CCCR0 0x0000036c
+#define MSR_P4_IQ_CCCR1 0x0000036d
+#define MSR_P4_IQ_CCCR2 0x0000036e
+#define MSR_P4_IQ_CCCR3 0x0000036f
+#define MSR_P4_IQ_CCCR4 0x00000370
+#define MSR_P4_IQ_CCCR5 0x00000371
+#define MSR_P4_ALF_ESCR0 0x000003ca
+#define MSR_P4_ALF_ESCR1 0x000003cb
+#define MSR_P4_BPU_ESCR0 0x000003b2
+#define MSR_P4_BPU_ESCR1 0x000003b3
+#define MSR_P4_BSU_ESCR0 0x000003a0
+#define MSR_P4_BSU_ESCR1 0x000003a1
+#define MSR_P4_CRU_ESCR0 0x000003b8
+#define MSR_P4_CRU_ESCR1 0x000003b9
+#define MSR_P4_CRU_ESCR2 0x000003cc
+#define MSR_P4_CRU_ESCR3 0x000003cd
+#define MSR_P4_CRU_ESCR4 0x000003e0
+#define MSR_P4_CRU_ESCR5 0x000003e1
+#define MSR_P4_DAC_ESCR0 0x000003a8
+#define MSR_P4_DAC_ESCR1 0x000003a9
+#define MSR_P4_FIRM_ESCR0 0x000003a4
+#define MSR_P4_FIRM_ESCR1 0x000003a5
+#define MSR_P4_FLAME_ESCR0 0x000003a6
+#define MSR_P4_FLAME_ESCR1 0x000003a7
+#define MSR_P4_FSB_ESCR0 0x000003a2
+#define MSR_P4_FSB_ESCR1 0x000003a3
+#define MSR_P4_IQ_ESCR0 0x000003ba
+#define MSR_P4_IQ_ESCR1 0x000003bb
+#define MSR_P4_IS_ESCR0 0x000003b4
+#define MSR_P4_IS_ESCR1 0x000003b5
+#define MSR_P4_ITLB_ESCR0 0x000003b6
+#define MSR_P4_ITLB_ESCR1 0x000003b7
+#define MSR_P4_IX_ESCR0 0x000003c8
+#define MSR_P4_IX_ESCR1 0x000003c9
+#define MSR_P4_MOB_ESCR0 0x000003aa
+#define MSR_P4_MOB_ESCR1 0x000003ab
+#define MSR_P4_MS_ESCR0 0x000003c0
+#define MSR_P4_MS_ESCR1 0x000003c1
+#define MSR_P4_PMH_ESCR0 0x000003ac
+#define MSR_P4_PMH_ESCR1 0x000003ad
+#define MSR_P4_RAT_ESCR0 0x000003bc
+#define MSR_P4_RAT_ESCR1 0x000003bd
+#define MSR_P4_SAAT_ESCR0 0x000003ae
+#define MSR_P4_SAAT_ESCR1 0x000003af
+#define MSR_P4_SSU_ESCR0 0x000003be
+#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */
+
+#define MSR_P4_TBPU_ESCR0 0x000003c2
+#define MSR_P4_TBPU_ESCR1 0x000003c3
+#define MSR_P4_TC_ESCR0 0x000003c4
+#define MSR_P4_TC_ESCR1 0x000003c5
+#define MSR_P4_U2L_ESCR0 0x000003b0
+#define MSR_P4_U2L_ESCR1 0x000003b1
+
+#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
+
+/* Intel Core-based CPU performance counters */
+#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
+#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
+#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
+#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
+#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
+#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
+
+/* Geode defined MSRs */
+#define MSR_GEODE_BUSCONT_CONF0 0x00001900
+
+/* Intel VT MSRs */
+#define MSR_IA32_VMX_BASIC 0x00000480
+#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
+#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
+#define MSR_IA32_VMX_EXIT_CTLS 0x00000483
+#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484
+#define MSR_IA32_VMX_MISC 0x00000485
+#define MSR_IA32_VMX_CR0_FIXED0 0x00000486
+#define MSR_IA32_VMX_CR0_FIXED1 0x00000487
+#define MSR_IA32_VMX_CR4_FIXED0 0x00000488
+#define MSR_IA32_VMX_CR4_FIXED1 0x00000489
+#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a
+#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
+#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d
+#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
+#define MSR_IA32_VMX_VMFUNC 0x00000491
+
+/* VMX_BASIC bits and bitmasks */
+#define VMX_BASIC_VMCS_SIZE_SHIFT 32
+#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
+#define VMX_BASIC_64 0x0001000000000000LLU
+#define VMX_BASIC_MEM_TYPE_SHIFT 50
+#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
+#define VMX_BASIC_MEM_TYPE_WB 6LLU
+#define VMX_BASIC_INOUT 0x0040000000000000LLU
+
+/* MSR_IA32_VMX_MISC bits */
+#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
+/* AMD-V MSRs */
+
+#define MSR_VM_CR 0xc0010114
+#define MSR_VM_IGNNE 0xc0010115
+#define MSR_VM_HSAVE_PA 0xc0010117
+
+#endif /* !SELFTEST_KVM_X86_H */
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
new file mode 100644
index 000000000000..cd01144d27c8
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -0,0 +1,92 @@
+/*
+ * tools/testing/selftests/kvm/lib/assert.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
+
+#include "test_util.h"
+
+#include <execinfo.h>
+#include <sys/syscall.h>
+
+#include "../../kselftest.h"
+
+/* Dumps the current stack trace to stderr. */
+static void __attribute__((noinline)) test_dump_stack(void);
+static void test_dump_stack(void)
+{
+ /*
+ * Build and run this command:
+ *
+ * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
+ * grep -v test_dump_stack | cat -n 1>&2
+ *
+ * Note that the spacing is different and there's no newline.
+ */
+ size_t i;
+ size_t n = 20;
+ void *stack[n];
+ const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai";
+ const char *pipeline = "|cat -n 1>&2";
+ char cmd[strlen(addr2line) + strlen(pipeline) +
+ /* N bytes per addr * 2 digits per byte + 1 space per addr: */
+ n * (((sizeof(void *)) * 2) + 1) +
+ /* Null terminator: */
+ 1];
+ char *c;
+
+ n = backtrace(stack, n);
+ c = &cmd[0];
+ c += sprintf(c, "%s", addr2line);
+ /*
+ * Skip the first 3 frames: backtrace, test_dump_stack, and
+ * test_assert. We hope that backtrace isn't inlined and the other two
+ * we've declared noinline.
+ */
+ for (i = 2; i < n; i++)
+ c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
+ c += sprintf(c, "%s", pipeline);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
+ system(cmd);
+#pragma GCC diagnostic pop
+}
+
+static pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+
+void __attribute__((noinline))
+test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line, const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!(exp)) {
+ va_start(ap, fmt);
+
+ fprintf(stderr, "==== Test Assertion Failure ====\n"
+ " %s:%u: %s\n"
+ " pid=%d tid=%d - %s\n",
+ file, line, exp_str, getpid(), gettid(),
+ strerror(errno));
+ test_dump_stack();
+ if (fmt) {
+ fputs(" ", stderr);
+ vfprintf(stderr, fmt, ap);
+ fputs("\n", stderr);
+ }
+ va_end(ap);
+
+ if (errno == EACCES)
+ ksft_exit_skip("Access denied - Exiting.\n");
+ exit(254);
+ }
+
+ return;
+}
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
new file mode 100644
index 000000000000..5eb857584aa3
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -0,0 +1,197 @@
+/*
+ * tools/testing/selftests/kvm/lib/elf.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+
+#include <bits/endian.h>
+#include <linux/elf.h>
+
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
+{
+ off_t offset_rv;
+
+ /* Open the ELF file. */
+ int fd;
+ fd = open(filename, O_RDONLY);
+ TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+ " filename: %s\n"
+ " rv: %i errno: %i", filename, fd, errno);
+
+ /* Read in and validate ELF Identification Record.
+ * The ELF Identification record is the first 16 (EI_NIDENT) bytes
+ * of the ELF header, which is at the beginning of the ELF file.
+ * For now it is only safe to read the first EI_NIDENT bytes. Once
+ * read and validated, the value of e_ehsize can be used to determine
+ * the real size of the ELF header.
+ */
+ unsigned char ident[EI_NIDENT];
+ test_read(fd, ident, sizeof(ident));
+ TEST_ASSERT((ident[EI_MAG0] == ELFMAG0) && (ident[EI_MAG1] == ELFMAG1)
+ && (ident[EI_MAG2] == ELFMAG2) && (ident[EI_MAG3] == ELFMAG3),
+ "ELF MAGIC Mismatch,\n"
+ " filename: %s\n"
+ " ident[EI_MAG0 - EI_MAG3]: %02x %02x %02x %02x\n"
+ " Expected: %02x %02x %02x %02x",
+ filename,
+ ident[EI_MAG0], ident[EI_MAG1], ident[EI_MAG2], ident[EI_MAG3],
+ ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3);
+ TEST_ASSERT(ident[EI_CLASS] == ELFCLASS64,
+ "Current implementation only able to handle ELFCLASS64,\n"
+ " filename: %s\n"
+ " ident[EI_CLASS]: %02x\n"
+ " expected: %02x",
+ filename,
+ ident[EI_CLASS], ELFCLASS64);
+ TEST_ASSERT(((BYTE_ORDER == LITTLE_ENDIAN)
+ && (ident[EI_DATA] == ELFDATA2LSB))
+ || ((BYTE_ORDER == BIG_ENDIAN)
+ && (ident[EI_DATA] == ELFDATA2MSB)), "Current "
+ "implementation only able to handle\n"
+ "cases where the host and ELF file endianness\n"
+ "is the same:\n"
+ " host BYTE_ORDER: %u\n"
+ " host LITTLE_ENDIAN: %u\n"
+ " host BIG_ENDIAN: %u\n"
+ " ident[EI_DATA]: %u\n"
+ " ELFDATA2LSB: %u\n"
+ " ELFDATA2MSB: %u",
+ BYTE_ORDER, LITTLE_ENDIAN, BIG_ENDIAN,
+ ident[EI_DATA], ELFDATA2LSB, ELFDATA2MSB);
+ TEST_ASSERT(ident[EI_VERSION] == EV_CURRENT,
+ "Current implementation only able to handle current "
+ "ELF version,\n"
+ " filename: %s\n"
+ " ident[EI_VERSION]: %02x\n"
+ " expected: %02x",
+ filename, ident[EI_VERSION], EV_CURRENT);
+
+ /* Read in the ELF header.
+ * With the ELF Identification portion of the ELF header
+ * validated, especially that the value at EI_VERSION is
+ * as expected, it is now safe to read the entire ELF header.
+ */
+ offset_rv = lseek(fd, 0, SEEK_SET);
+ TEST_ASSERT(offset_rv == 0, "Seek to ELF header failed,\n"
+ " rv: %zi expected: %i", offset_rv, 0);
+ test_read(fd, hdrp, sizeof(*hdrp));
+ TEST_ASSERT(hdrp->e_phentsize == sizeof(Elf64_Phdr),
+ "Unexpected physical header size,\n"
+ " hdrp->e_phentsize: %x\n"
+ " expected: %zx",
+ hdrp->e_phentsize, sizeof(Elf64_Phdr));
+ TEST_ASSERT(hdrp->e_shentsize == sizeof(Elf64_Shdr),
+ "Unexpected section header size,\n"
+ " hdrp->e_shentsize: %x\n"
+ " expected: %zx",
+ hdrp->e_shentsize, sizeof(Elf64_Shdr));
+}
+
+/* VM ELF Load
+ *
+ * Input Args:
+ * filename - Path to ELF file
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ * vm - Pointer to opaque type that describes the VM.
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Loads the program image of the ELF file specified by filename,
+ * into the virtual address space of the VM pointed to by vm. On entry
+ * the VM needs to not be using any of the virtual address space used
+ * by the image and it needs to have sufficient available physical pages, to
+ * back the virtual pages used to load the image.
+ */
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+ uint32_t data_memslot, uint32_t pgd_memslot)
+{
+ off_t offset, offset_rv;
+ Elf64_Ehdr hdr;
+
+ /* Open the ELF file. */
+ int fd;
+ fd = open(filename, O_RDONLY);
+ TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+ " filename: %s\n"
+ " rv: %i errno: %i", filename, fd, errno);
+
+ /* Read in the ELF header. */
+ elfhdr_get(filename, &hdr);
+
+ /* For each program header.
+ * The following ELF header members specify the location
+ * and size of the program headers:
+ *
+ * e_phoff - File offset to start of program headers
+ * e_phentsize - Size of each program header
+ * e_phnum - Number of program header entries
+ */
+ for (unsigned int n1 = 0; n1 < hdr.e_phnum; n1++) {
+ /* Seek to the beginning of the program header. */
+ offset = hdr.e_phoff + (n1 * hdr.e_phentsize);
+ offset_rv = lseek(fd, offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == offset,
+ "Failed to seek to begining of program header %u,\n"
+ " filename: %s\n"
+ " rv: %jd errno: %i",
+ n1, filename, (intmax_t) offset_rv, errno);
+
+ /* Read in the program header. */
+ Elf64_Phdr phdr;
+ test_read(fd, &phdr, sizeof(phdr));
+
+ /* Skip if this header doesn't describe a loadable segment. */
+ if (phdr.p_type != PT_LOAD)
+ continue;
+
+ /* Allocate memory for this segment within the VM. */
+ TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment "
+ "memsize of 0,\n"
+ " phdr index: %u p_memsz: 0x%" PRIx64,
+ n1, (uint64_t) phdr.p_memsz);
+ vm_vaddr_t seg_vstart = phdr.p_vaddr;
+ seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+ vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
+ seg_vend |= vm->page_size - 1;
+ size_t seg_size = seg_vend - seg_vstart + 1;
+
+ vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
+ data_memslot, pgd_memslot);
+ TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
+ "virtual memory for segment at requested min addr,\n"
+ " segment idx: %u\n"
+ " seg_vstart: 0x%lx\n"
+ " vaddr: 0x%lx",
+ n1, seg_vstart, vaddr);
+ memset(addr_gva2hva(vm, vaddr), 0, seg_size);
+ /* TODO(lhuemill): Set permissions of each memory segment
+ * based on the least-significant 3 bits of phdr.p_flags.
+ */
+
+ /* Load portion of initial state that is contained within
+ * the ELF file.
+ */
+ if (phdr.p_filesz) {
+ offset_rv = lseek(fd, phdr.p_offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == phdr.p_offset,
+ "Seek to program segment offset failed,\n"
+ " program header idx: %u errno: %i\n"
+ " offset_rv: 0x%jx\n"
+ " expected: 0x%jx\n",
+ n1, errno, (intmax_t) offset_rv,
+ (intmax_t) phdr.p_offset);
+ test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
+ phdr.p_filesz);
+ }
+ }
+}
diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c
new file mode 100644
index 000000000000..cff869ffe6ee
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/io.c
@@ -0,0 +1,158 @@
+/*
+ * tools/testing/selftests/kvm/lib/io.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+
+/* Test Write
+ *
+ * A wrapper for write(2), that automatically handles the following
+ * special conditions:
+ *
+ * + Interrupted system call (EINTR)
+ * + Write of less than requested amount
+ * + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional write is performed to automatically
+ * continue writing the requested data.
+ * There are also many cases where write(2) can return an unexpected
+ * error (e.g. EIO). Such errors cause a TEST_ASSERT failure.
+ *
+ * Note, for function signature compatibility with write(2), this function
+ * returns the number of bytes written, but that value will always be equal
+ * to the number of requested bytes. All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * write(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ * fd - Opened file descriptor to file to be written.
+ * count - Number of bytes to write.
+ *
+ * Output:
+ * buf - Starting address of data to be written.
+ *
+ * Return:
+ * On success, number of bytes written.
+ * On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_write(int fd, const void *buf, size_t count)
+{
+ ssize_t rc;
+ ssize_t num_written = 0;
+ size_t num_left = count;
+ const char *ptr = buf;
+
+ /* Note: Count of zero is allowed (see "RETURN VALUE" portion of
+ * write(2) manpage for details.
+ */
+ TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+ do {
+ rc = write(fd, ptr, num_left);
+
+ switch (rc) {
+ case -1:
+ TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+ "Unexpected write failure,\n"
+ " rc: %zi errno: %i", rc, errno);
+ continue;
+
+ case 0:
+ TEST_ASSERT(false, "Unexpected EOF,\n"
+ " rc: %zi num_written: %zi num_left: %zu",
+ rc, num_written, num_left);
+ break;
+
+ default:
+ TEST_ASSERT(rc >= 0, "Unexpected ret from write,\n"
+ " rc: %zi errno: %i", rc, errno);
+ num_written += rc;
+ num_left -= rc;
+ ptr += rc;
+ break;
+ }
+ } while (num_written < count);
+
+ return num_written;
+}
+
+/* Test Read
+ *
+ * A wrapper for read(2), that automatically handles the following
+ * special conditions:
+ *
+ * + Interrupted system call (EINTR)
+ * + Read of less than requested amount
+ * + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional read is performed to automatically
+ * continue reading the requested data.
+ * There are also many cases where read(2) can return an unexpected
+ * error (e.g. EIO). Such errors cause a TEST_ASSERT failure. Note,
+ * it is expected that the file opened by fd at the current file position
+ * contains at least the number of requested bytes to be read. A TEST_ASSERT
+ * failure is produced if an End-Of-File condition occurs, before all the
+ * data is read. It is the callers responsibility to assure that sufficient
+ * data exists.
+ *
+ * Note, for function signature compatibility with read(2), this function
+ * returns the number of bytes read, but that value will always be equal
+ * to the number of requested bytes. All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * read(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ * fd - Opened file descriptor to file to be read.
+ * count - Number of bytes to read.
+ *
+ * Output:
+ * buf - Starting address of where to write the bytes read.
+ *
+ * Return:
+ * On success, number of bytes read.
+ * On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_read(int fd, void *buf, size_t count)
+{
+ ssize_t rc;
+ ssize_t num_read = 0;
+ size_t num_left = count;
+ char *ptr = buf;
+
+ /* Note: Count of zero is allowed (see "If count is zero" portion of
+ * read(2) manpage for details.
+ */
+ TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+ do {
+ rc = read(fd, ptr, num_left);
+
+ switch (rc) {
+ case -1:
+ TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+ "Unexpected read failure,\n"
+ " rc: %zi errno: %i", rc, errno);
+ break;
+
+ case 0:
+ TEST_ASSERT(false, "Unexpected EOF,\n"
+ " rc: %zi num_read: %zi num_left: %zu",
+ rc, num_read, num_left);
+ break;
+
+ default:
+ TEST_ASSERT(rc > 0, "Unexpected ret from read,\n"
+ " rc: %zi errno: %i", rc, errno);
+ num_read += rc;
+ num_left -= rc;
+ ptr += rc;
+ break;
+ }
+ } while (num_read < count);
+
+ return num_read;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
new file mode 100644
index 000000000000..37e2a787d2fc
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -0,0 +1,1486 @@
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#define KVM_DEV_PATH "/dev/kvm"
+
+#define KVM_UTIL_PGS_PER_HUGEPG 512
+#define KVM_UTIL_MIN_PADDR 0x2000
+
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static void *align(void *x, size_t size)
+{
+ size_t mask = size - 1;
+ TEST_ASSERT(size != 0 && !(size & (size - 1)),
+ "size not a power of 2: %lu", size);
+ return (void *) (((size_t) x + mask) & ~mask);
+}
+
+/* Capability
+ *
+ * Input Args:
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return:
+ * On success, the Value corresponding to the capability (KVM_CAP_*)
+ * specified by the value of cap. On failure a TEST_ASSERT failure
+ * is produced.
+ *
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+int kvm_check_cap(long cap)
+{
+ int ret;
+ int kvm_fd;
+
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
+ TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+ " rc: %i errno: %i", ret, errno);
+
+ close(kvm_fd);
+
+ return ret;
+}
+
+/* VM Create
+ *
+ * Input Args:
+ * mode - VM Mode (e.g. VM_MODE_FLAT48PG)
+ * phy_pages - Physical memory pages
+ * perm - permission
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
+ * When phy_pages is non-zero, a memory region of phy_pages physical pages
+ * is created and mapped starting at guest physical address 0. The file
+ * descriptor to control the created VM is created with the permissions
+ * given by perm (e.g. O_RDWR).
+ */
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+{
+ struct kvm_vm *vm;
+ int kvm_fd;
+
+ /* Allocate memory. */
+ vm = calloc(1, sizeof(*vm));
+ TEST_ASSERT(vm != NULL, "Insufficent Memory");
+
+ vm->mode = mode;
+ kvm_fd = open(KVM_DEV_PATH, perm);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ /* Create VM. */
+ vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL);
+ TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
+ "rc: %i errno: %i", vm->fd, errno);
+
+ close(kvm_fd);
+
+ /* Setup mode specific traits. */
+ switch (vm->mode) {
+ case VM_MODE_FLAT48PG:
+ vm->page_size = 0x1000;
+ vm->page_shift = 12;
+
+ /* Limit to 48-bit canonical virtual addresses. */
+ vm->vpages_valid = sparsebit_alloc();
+ sparsebit_set_num(vm->vpages_valid,
+ 0, (1ULL << (48 - 1)) >> vm->page_shift);
+ sparsebit_set_num(vm->vpages_valid,
+ (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift,
+ (1ULL << (48 - 1)) >> vm->page_shift);
+
+ /* Limit physical addresses to 52-bits. */
+ vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1;
+ break;
+
+ default:
+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
+ }
+
+ /* Allocate and setup memory for guest. */
+ vm->vpages_mapped = sparsebit_alloc();
+ if (phy_pages != 0)
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ 0, 0, phy_pages, 0);
+
+ return vm;
+}
+
+/* Userspace Memory Region Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * start - Starting VM physical address
+ * end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to overlapping region, NULL if no such region.
+ *
+ * Searches for a region with any physical memory that overlaps with
+ * any portion of the guest physical addresses from start to end
+ * inclusive. If multiple overlapping regions exist, a pointer to any
+ * of the regions is returned. Null is returned only when no overlapping
+ * region exists.
+ */
+static struct userspace_mem_region *userspace_mem_region_find(
+ struct kvm_vm *vm, uint64_t start, uint64_t end)
+{
+ struct userspace_mem_region *region;
+
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ uint64_t existing_start = region->region.guest_phys_addr;
+ uint64_t existing_end = region->region.guest_phys_addr
+ + region->region.memory_size - 1;
+ if (start <= existing_end && end >= existing_start)
+ return region;
+ }
+
+ return NULL;
+}
+
+/* KVM Userspace Memory Region Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * start - Starting VM physical address
+ * end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to overlapping region, NULL if no such region.
+ *
+ * Public interface to userspace_mem_region_find. Allows tests to look up
+ * the memslot datastructure for a given range of guest physical memory.
+ */
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+ uint64_t end)
+{
+ struct userspace_mem_region *region;
+
+ region = userspace_mem_region_find(vm, start, end);
+ if (!region)
+ return NULL;
+
+ return &region->region;
+}
+
+/* VCPU Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to VCPU structure
+ *
+ * Locates a vcpu structure that describes the VCPU specified by vcpuid and
+ * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU
+ * for the specified vcpuid.
+ */
+struct vcpu *vcpu_find(struct kvm_vm *vm,
+ uint32_t vcpuid)
+{
+ struct vcpu *vcpup;
+
+ for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
+ if (vcpup->id == vcpuid)
+ return vcpup;
+ }
+
+ return NULL;
+}
+
+/* VM VCPU Remove
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Within the VM specified by vm, removes the VCPU given by vcpuid.
+ */
+static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+ int ret = close(vcpu->fd);
+ TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
+ "errno: %i", ret, errno);
+
+ if (vcpu->next)
+ vcpu->next->prev = vcpu->prev;
+ if (vcpu->prev)
+ vcpu->prev->next = vcpu->next;
+ else
+ vm->vcpu_head = vcpu->next;
+ free(vcpu);
+}
+
+
+/* Destroys and frees the VM pointed to by vmp.
+ */
+void kvm_vm_free(struct kvm_vm *vmp)
+{
+ int ret;
+
+ if (vmp == NULL)
+ return;
+
+ /* Free userspace_mem_regions. */
+ while (vmp->userspace_mem_region_head) {
+ struct userspace_mem_region *region
+ = vmp->userspace_mem_region_head;
+
+ region->region.memory_size = 0;
+ ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
+ &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+
+ vmp->userspace_mem_region_head = region->next;
+ sparsebit_free(&region->unused_phy_pages);
+ ret = munmap(region->mmap_start, region->mmap_size);
+ TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
+ ret, errno);
+
+ free(region);
+ }
+
+ /* Free VCPUs. */
+ while (vmp->vcpu_head)
+ vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+
+ /* Free sparsebit arrays. */
+ sparsebit_free(&vmp->vpages_valid);
+ sparsebit_free(&vmp->vpages_mapped);
+
+ /* Close file descriptor for the VM. */
+ ret = close(vmp->fd);
+ TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
+ " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
+
+ /* Free the structure describing the VM. */
+ free(vmp);
+}
+
+/* Memory Compare, host virtual to guest virtual
+ *
+ * Input Args:
+ * hva - Starting host virtual address
+ * vm - Virtual Machine
+ * gva - Starting guest virtual address
+ * len - number of bytes to compare
+ *
+ * Output Args: None
+ *
+ * Input/Output Args: None
+ *
+ * Return:
+ * Returns 0 if the bytes starting at hva for a length of len
+ * are equal the guest virtual bytes starting at gva. Returns
+ * a value < 0, if bytes at hva are less than those at gva.
+ * Otherwise a value > 0 is returned.
+ *
+ * Compares the bytes starting at the host virtual address hva, for
+ * a length of len, to the guest bytes starting at the guest virtual
+ * address given by gva.
+ */
+int kvm_memcmp_hva_gva(void *hva,
+ struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
+{
+ size_t amt;
+
+ /* Compare a batch of bytes until either a match is found
+ * or all the bytes have been compared.
+ */
+ for (uintptr_t offset = 0; offset < len; offset += amt) {
+ uintptr_t ptr1 = (uintptr_t)hva + offset;
+
+ /* Determine host address for guest virtual address
+ * at offset.
+ */
+ uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
+
+ /* Determine amount to compare on this pass.
+ * Don't allow the comparsion to cross a page boundary.
+ */
+ amt = len - offset;
+ if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
+ amt = vm->page_size - (ptr1 % vm->page_size);
+ if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
+ amt = vm->page_size - (ptr2 % vm->page_size);
+
+ assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
+ assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
+
+ /* Perform the comparison. If there is a difference
+ * return that result to the caller, otherwise need
+ * to continue on looking for a mismatch.
+ */
+ int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
+ if (ret != 0)
+ return ret;
+ }
+
+ /* No mismatch found. Let the caller know the two memory
+ * areas are equal.
+ */
+ return 0;
+}
+
+/* Allocate an instance of struct kvm_cpuid2
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the allocated struct. The caller is responsible
+ * for freeing this struct.
+ *
+ * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
+ * array to be decided at allocation time, allocation is slightly
+ * complicated. This function uses a reasonable default length for
+ * the array and performs the appropriate allocation.
+ */
+static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+{
+ struct kvm_cpuid2 *cpuid;
+ int nent = 100;
+ size_t size;
+
+ size = sizeof(*cpuid);
+ size += nent * sizeof(struct kvm_cpuid_entry2);
+ cpuid = malloc(size);
+ if (!cpuid) {
+ perror("malloc");
+ abort();
+ }
+
+ cpuid->nent = nent;
+
+ return cpuid;
+}
+
+/* KVM Supported CPUID Get
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *
+ * Return: The supported KVM CPUID
+ *
+ * Get the guest CPUID supported by KVM.
+ */
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+ static struct kvm_cpuid2 *cpuid;
+ int ret;
+ int kvm_fd;
+
+ if (cpuid)
+ return cpuid;
+
+ cpuid = allocate_kvm_cpuid2();
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+ TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
+ ret, errno);
+
+ close(kvm_fd);
+ return cpuid;
+}
+
+/* Locate a cpuid entry.
+ *
+ * Input Args:
+ * cpuid: The cpuid.
+ * function: The function of the cpuid entry to find.
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the cpuid entry. Never returns NULL.
+ */
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+{
+ struct kvm_cpuid2 *cpuid;
+ struct kvm_cpuid_entry2 *entry = NULL;
+ int i;
+
+ cpuid = kvm_get_supported_cpuid();
+ for (i = 0; i < cpuid->nent; i++) {
+ if (cpuid->entries[i].function == function &&
+ cpuid->entries[i].index == index) {
+ entry = &cpuid->entries[i];
+ break;
+ }
+ }
+
+ TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
+ function, index);
+ return entry;
+}
+
+/* VM Userspace Memory Region Add
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * backing_src - Storage source for this region.
+ * NULL to use anonymous memory.
+ * guest_paddr - Starting guest physical address
+ * slot - KVM region slot
+ * npages - Number of physical pages
+ * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Allocates a memory area of the number of pages specified by npages
+ * and maps it to the VM specified by vm, at a starting physical address
+ * given by guest_paddr. The region is created with a KVM region slot
+ * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
+ * region is created with the flags given by flags.
+ */
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags)
+{
+ int ret;
+ unsigned long pmem_size = 0;
+ struct userspace_mem_region *region;
+ size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+
+ TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+ "address not on a page boundary.\n"
+ " guest_paddr: 0x%lx vm->page_size: 0x%x",
+ guest_paddr, vm->page_size);
+ TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+ <= vm->max_gfn, "Physical range beyond maximum "
+ "supported physical address,\n"
+ " guest_paddr: 0x%lx npages: 0x%lx\n"
+ " vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ guest_paddr, npages, vm->max_gfn, vm->page_size);
+
+ /* Confirm a mem region with an overlapping address doesn't
+ * already exist.
+ */
+ region = (struct userspace_mem_region *) userspace_mem_region_find(
+ vm, guest_paddr, guest_paddr + npages * vm->page_size);
+ if (region != NULL)
+ TEST_ASSERT(false, "overlapping userspace_mem_region already "
+ "exists\n"
+ " requested guest_paddr: 0x%lx npages: 0x%lx "
+ "page_size: 0x%x\n"
+ " existing guest_paddr: 0x%lx size: 0x%lx",
+ guest_paddr, npages, vm->page_size,
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size);
+
+ /* Confirm no region with the requested slot already exists. */
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if (region->region.slot == slot)
+ break;
+ if ((guest_paddr <= (region->region.guest_phys_addr
+ + region->region.memory_size))
+ && ((guest_paddr + npages * vm->page_size)
+ >= region->region.guest_phys_addr))
+ break;
+ }
+ if (region != NULL)
+ TEST_ASSERT(false, "A mem region with the requested slot "
+ "or overlapping physical memory range already exists.\n"
+ " requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
+ " existing slot: %u paddr: 0x%lx size: 0x%lx",
+ slot, guest_paddr, npages,
+ region->region.slot,
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size);
+
+ /* Allocate and initialize new mem region structure. */
+ region = calloc(1, sizeof(*region));
+ TEST_ASSERT(region != NULL, "Insufficient Memory");
+ region->mmap_size = npages * vm->page_size;
+
+ /* Enough memory to align up to a huge page. */
+ if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
+ region->mmap_size += huge_page_size;
+ region->mmap_start = mmap(NULL, region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS
+ | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
+ -1, 0);
+ TEST_ASSERT(region->mmap_start != MAP_FAILED,
+ "test_malloc failed, mmap_start: %p errno: %i",
+ region->mmap_start, errno);
+
+ /* Align THP allocation up to start of a huge page. */
+ region->host_mem = align(region->mmap_start,
+ src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1);
+
+ /* As needed perform madvise */
+ if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
+ ret = madvise(region->host_mem, npages * vm->page_size,
+ src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+ TEST_ASSERT(ret == 0, "madvise failed,\n"
+ " addr: %p\n"
+ " length: 0x%lx\n"
+ " src_type: %x",
+ region->host_mem, npages * vm->page_size, src_type);
+ }
+
+ region->unused_phy_pages = sparsebit_alloc();
+ sparsebit_set_num(region->unused_phy_pages,
+ guest_paddr >> vm->page_shift, npages);
+ region->region.slot = slot;
+ region->region.flags = flags;
+ region->region.guest_phys_addr = guest_paddr;
+ region->region.memory_size = npages * vm->page_size;
+ region->region.userspace_addr = (uintptr_t) region->host_mem;
+ ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ " rc: %i errno: %i\n"
+ " slot: %u flags: 0x%x\n"
+ " guest_phys_addr: 0x%lx size: 0x%lx",
+ ret, errno, slot, flags,
+ guest_paddr, (uint64_t) region->region.memory_size);
+
+ /* Add to linked-list of memory regions. */
+ if (vm->userspace_mem_region_head)
+ vm->userspace_mem_region_head->prev = region;
+ region->next = vm->userspace_mem_region_head;
+ vm->userspace_mem_region_head = region;
+}
+
+/* Memslot to region
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * memslot - KVM memory slot ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to memory region structure that describe memory region
+ * using kvm memory slot ID given by memslot. TEST_ASSERT failure
+ * on error (e.g. currently no memory region using memslot as a KVM
+ * memory slot ID).
+ */
+static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
+ uint32_t memslot)
+{
+ struct userspace_mem_region *region;
+
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if (region->region.slot == memslot)
+ break;
+ }
+ if (region == NULL) {
+ fprintf(stderr, "No mem region with the requested slot found,\n"
+ " requested slot: %u\n", memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ TEST_ASSERT(false, "Mem region not found");
+ }
+
+ return region;
+}
+
+/* VM Memory Region Flags Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * flags - Starting guest physical address
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the flags of the memory region specified by the value of slot,
+ * to the values given by flags.
+ */
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
+{
+ int ret;
+ struct userspace_mem_region *region;
+
+ /* Locate memory region. */
+ region = memslot2region(vm, slot);
+
+ region->region.flags = flags;
+
+ ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ " rc: %i errno: %i slot: %u flags: 0x%x",
+ ret, errno, slot, flags);
+}
+
+/* VCPU mmap Size
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Size of VCPU state
+ *
+ * Returns the size of the structure pointed to by the return value
+ * of vcpu_state().
+ */
+static int vcpu_mmap_sz(void)
+{
+ int dev_fd, ret;
+
+ dev_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (dev_fd < 0)
+ exit(KSFT_SKIP);
+
+ ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+ TEST_ASSERT(ret >= sizeof(struct kvm_run),
+ "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
+ __func__, ret, errno);
+
+ close(dev_fd);
+
+ return ret;
+}
+
+/* VM VCPU Add
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates and adds to the VM specified by vm and virtual CPU with
+ * the ID given by vcpuid.
+ */
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu;
+
+ /* Confirm a vcpu with the specified id doesn't already exist. */
+ vcpu = vcpu_find(vm, vcpuid);
+ if (vcpu != NULL)
+ TEST_ASSERT(false, "vcpu with the specified id "
+ "already exists,\n"
+ " requested vcpuid: %u\n"
+ " existing vcpuid: %u state: %p",
+ vcpuid, vcpu->id, vcpu->state);
+
+ /* Allocate and initialize new vcpu structure. */
+ vcpu = calloc(1, sizeof(*vcpu));
+ TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
+ vcpu->id = vcpuid;
+ vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
+ TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
+ vcpu->fd, errno);
+
+ TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
+ "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
+ vcpu_mmap_sz(), sizeof(*vcpu->state));
+ vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+ PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
+ TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
+ "vcpu id: %u errno: %i", vcpuid, errno);
+
+ /* Add to linked-list of VCPUs. */
+ if (vm->vcpu_head)
+ vm->vcpu_head->prev = vcpu;
+ vcpu->next = vm->vcpu_head;
+ vm->vcpu_head = vcpu;
+
+ vcpu_setup(vm, vcpuid);
+}
+
+/* VM Virtual Address Unused Gap
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * sz - Size (bytes)
+ * vaddr_min - Minimum Virtual Address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Lowest virtual address at or below vaddr_min, with at least
+ * sz unused bytes. TEST_ASSERT failure if no area of at least
+ * size sz is available.
+ *
+ * Within the VM specified by vm, locates the lowest starting virtual
+ * address >= vaddr_min, that has at least sz unallocated bytes. A
+ * TEST_ASSERT failure occurs for invalid input or no area of at least
+ * sz unallocated bytes >= vaddr_min is available.
+ */
+static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min)
+{
+ uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
+
+ /* Determine lowest permitted virtual page index. */
+ uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
+ if ((pgidx_start * vm->page_size) < vaddr_min)
+ goto no_va_found;
+
+ /* Loop over section with enough valid virtual page indexes. */
+ if (!sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages))
+ pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
+ pgidx_start, pages);
+ do {
+ /*
+ * Are there enough unused virtual pages available at
+ * the currently proposed starting virtual page index.
+ * If not, adjust proposed starting index to next
+ * possible.
+ */
+ if (sparsebit_is_clear_num(vm->vpages_mapped,
+ pgidx_start, pages))
+ goto va_found;
+ pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
+ pgidx_start, pages);
+ if (pgidx_start == 0)
+ goto no_va_found;
+
+ /*
+ * If needed, adjust proposed starting virtual address,
+ * to next range of valid virtual addresses.
+ */
+ if (!sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages)) {
+ pgidx_start = sparsebit_next_set_num(
+ vm->vpages_valid, pgidx_start, pages);
+ if (pgidx_start == 0)
+ goto no_va_found;
+ }
+ } while (pgidx_start != 0);
+
+no_va_found:
+ TEST_ASSERT(false, "No vaddr of specified pages available, "
+ "pages: 0x%lx", pages);
+
+ /* NOT REACHED */
+ return -1;
+
+va_found:
+ TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages),
+ "Unexpected, invalid virtual page index range,\n"
+ " pgidx_start: 0x%lx\n"
+ " pages: 0x%lx",
+ pgidx_start, pages);
+ TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
+ pgidx_start, pages),
+ "Unexpected, pages already mapped,\n"
+ " pgidx_start: 0x%lx\n"
+ " pages: 0x%lx",
+ pgidx_start, pages);
+
+ return pgidx_start * vm->page_size;
+}
+
+/* VM Virtual Address Allocate
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * sz - Size in bytes
+ * vaddr_min - Minimum starting virtual address
+ * data_memslot - Memory region slot for data pages
+ * pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least sz bytes within the virtual address space of the vm
+ * given by vm. The allocated bytes are mapped to a virtual address >=
+ * the address given by vaddr_min. Note that each allocation uses a
+ * a unique set of pages, with the minimum real allocation being at least
+ * a page.
+ */
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ uint32_t data_memslot, uint32_t pgd_memslot)
+{
+ uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
+
+ virt_pgd_alloc(vm, pgd_memslot);
+
+ /* Find an unused range of virtual page addresses of at least
+ * pages in length.
+ */
+ vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
+
+ /* Map the virtual pages. */
+ for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
+ pages--, vaddr += vm->page_size) {
+ vm_paddr_t paddr;
+
+ paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
+
+ virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+
+ sparsebit_set(vm->vpages_mapped,
+ vaddr >> vm->page_shift);
+ }
+
+ return vaddr_start;
+}
+
+/* Address VM Physical to Host Virtual
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gpa - VM physical address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent host virtual address
+ *
+ * Locates the memory region containing the VM physical address given
+ * by gpa, within the VM given by vm. When found, the host virtual
+ * address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing gpa exists.
+ */
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+ struct userspace_mem_region *region;
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if ((gpa >= region->region.guest_phys_addr)
+ && (gpa <= (region->region.guest_phys_addr
+ + region->region.memory_size - 1)))
+ return (void *) ((uintptr_t) region->host_mem
+ + (gpa - region->region.guest_phys_addr));
+ }
+
+ TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa);
+ return NULL;
+}
+
+/* Address Host Virtual to VM Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * hva - Host virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Locates the memory region containing the host virtual address given
+ * by hva, within the VM given by vm. When found, the equivalent
+ * VM physical address is returned. A TEST_ASSERT failure occurs if no
+ * region containing hva exists.
+ */
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
+{
+ struct userspace_mem_region *region;
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if ((hva >= region->host_mem)
+ && (hva <= (region->host_mem
+ + region->region.memory_size - 1)))
+ return (vm_paddr_t) ((uintptr_t)
+ region->region.guest_phys_addr
+ + (hva - (uintptr_t) region->host_mem));
+ }
+
+ TEST_ASSERT(false, "No mapping to a guest physical address, "
+ "hva: %p", hva);
+ return -1;
+}
+
+/* VM Create IRQ Chip
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates an interrupt controller chip for the VM specified by vm.
+ */
+void vm_create_irqchip(struct kvm_vm *vm)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
+ TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+/* VM VCPU State
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to structure that describes the state of the VCPU.
+ *
+ * Locates and returns a pointer to a structure that describes the
+ * state of the VCPU with the given vcpuid.
+ */
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ return vcpu->state;
+}
+
+/* VM VCPU Run
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Switch to executing the code for the VCPU given by vcpuid, within the VM
+ * given by vm.
+ */
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ int ret = _vcpu_run(vm, vcpuid);
+ TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int rc;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ do {
+ rc = ioctl(vcpu->fd, KVM_RUN, NULL);
+ } while (rc == -1 && errno == EINTR);
+ return rc;
+}
+
+/* VM VCPU Set MP State
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * mp_state - mp_state to be set
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the MP state of the VCPU given by vcpuid, to the state given
+ * by mp_state.
+ */
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_mp_state *mp_state)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
+ TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+/* VM VCPU Regs Get
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args:
+ * regs - current state of VCPU regs
+ *
+ * Return: None
+ *
+ * Obtains the current register state for the VCPU specified by vcpuid
+ * and stores it at the location given by regs.
+ */
+void vcpu_regs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
+ TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/* VM VCPU Regs Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * regs - Values to set VCPU regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the regs of the VCPU specified by vcpuid to the values
+ * given by regs.
+ */
+void vcpu_regs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Set the regs. */
+ ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
+ TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
+ TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Set the regs. */
+ ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
+ TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/* VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first num function input arguments to the values
+ * given as variable args. Each of the variable args is expected to
+ * be of type uint64_t.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+ " num: %u\n",
+ num);
+
+ va_start(ap, num);
+ vcpu_regs_get(vm, vcpuid, &regs);
+
+ if (num >= 1)
+ regs.rdi = va_arg(ap, uint64_t);
+
+ if (num >= 2)
+ regs.rsi = va_arg(ap, uint64_t);
+
+ if (num >= 3)
+ regs.rdx = va_arg(ap, uint64_t);
+
+ if (num >= 4)
+ regs.rcx = va_arg(ap, uint64_t);
+
+ if (num >= 5)
+ regs.r8 = va_arg(ap, uint64_t);
+
+ if (num >= 6)
+ regs.r9 = va_arg(ap, uint64_t);
+
+ vcpu_regs_set(vm, vcpuid, &regs);
+ va_end(ap);
+}
+
+/* VM VCPU System Regs Get
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args:
+ * sregs - current state of VCPU system regs
+ *
+ * Return: None
+ *
+ * Obtains the current system register state for the VCPU specified by
+ * vcpuid and stores it at the location given by sregs.
+ */
+void vcpu_sregs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ /* Get the regs. */
+ ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
+ TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/* VM VCPU System Regs Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * sregs - Values to set VCPU system regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the system regs of the VCPU specified by vcpuid to the values
+ * given by sregs.
+ */
+void vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
+ TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
+}
+
+/* VCPU Ioctl
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * cmd - Ioctl number
+ * arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VCPU fd.
+ */
+void vcpu_ioctl(struct kvm_vm *vm,
+ uint32_t vcpuid, unsigned long cmd, void *arg)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ ret = ioctl(vcpu->fd, cmd, arg);
+ TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
+ cmd, ret, errno, strerror(errno));
+}
+
+/* VM Ioctl
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cmd - Ioctl number
+ * arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VM fd.
+ */
+void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, cmd, arg);
+ TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
+ cmd, ret, errno, strerror(errno));
+}
+
+/* VM Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VM given by vm, to the FILE stream
+ * given by stream.
+ */
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ struct userspace_mem_region *region;
+ struct vcpu *vcpu;
+
+ fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
+ fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
+ fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
+ fprintf(stream, "%*sMem Regions:\n", indent, "");
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
+ "host_virt: %p\n", indent + 2, "",
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size,
+ region->host_mem);
+ fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
+ sparsebit_dump(stream, region->unused_phy_pages, 0);
+ }
+ fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
+ sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
+ fprintf(stream, "%*spgd_created: %u\n", indent, "",
+ vm->pgd_created);
+ if (vm->pgd_created) {
+ fprintf(stream, "%*sVirtual Translation Tables:\n",
+ indent + 2, "");
+ virt_dump(stream, vm, indent + 4);
+ }
+ fprintf(stream, "%*sVCPUs:\n", indent, "");
+ for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
+ vcpu_dump(stream, vm, vcpu->id, indent + 2);
+}
+
+/* VM VCPU Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by vcpuid, within the VM
+ * given by vm, to the FILE stream given by stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm,
+ uint32_t vcpuid, uint8_t indent)
+{
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+
+ fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+
+ fprintf(stream, "%*sregs:\n", indent + 2, "");
+ vcpu_regs_get(vm, vcpuid, &regs);
+ regs_dump(stream, &regs, indent + 4);
+
+ fprintf(stream, "%*ssregs:\n", indent + 2, "");
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+ sregs_dump(stream, &sregs, indent + 4);
+}
+
+/* Known KVM exit reasons */
+static struct exit_reason {
+ unsigned int reason;
+ const char *name;
+} exit_reasons_known[] = {
+ {KVM_EXIT_UNKNOWN, "UNKNOWN"},
+ {KVM_EXIT_EXCEPTION, "EXCEPTION"},
+ {KVM_EXIT_IO, "IO"},
+ {KVM_EXIT_HYPERCALL, "HYPERCALL"},
+ {KVM_EXIT_DEBUG, "DEBUG"},
+ {KVM_EXIT_HLT, "HLT"},
+ {KVM_EXIT_MMIO, "MMIO"},
+ {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
+ {KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
+ {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
+ {KVM_EXIT_INTR, "INTR"},
+ {KVM_EXIT_SET_TPR, "SET_TPR"},
+ {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
+ {KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
+ {KVM_EXIT_S390_RESET, "S390_RESET"},
+ {KVM_EXIT_DCR, "DCR"},
+ {KVM_EXIT_NMI, "NMI"},
+ {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
+ {KVM_EXIT_OSI, "OSI"},
+ {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
+ {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
+#endif
+};
+
+/* Exit Reason String
+ *
+ * Input Args:
+ * exit_reason - Exit reason
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Constant string pointer describing the exit reason.
+ *
+ * Locates and returns a constant string that describes the KVM exit
+ * reason given by exit_reason. If no such string is found, a constant
+ * string of "Unknown" is returned.
+ */
+const char *exit_reason_str(unsigned int exit_reason)
+{
+ unsigned int n1;
+
+ for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
+ if (exit_reason == exit_reasons_known[n1].reason)
+ return exit_reasons_known[n1].name;
+ }
+
+ return "Unknown";
+}
+
+/* Physical Page Allocate
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * paddr_min - Physical address minimum
+ * memslot - Memory region to allocate page from
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting physical address
+ *
+ * Within the VM specified by vm, locates an available physical page
+ * at or above paddr_min. If found, the page is marked as in use
+ * and its address is returned. A TEST_ASSERT failure occurs if no
+ * page is available at or above paddr_min.
+ */
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
+ vm_paddr_t paddr_min, uint32_t memslot)
+{
+ struct userspace_mem_region *region;
+ sparsebit_idx_t pg;
+
+ TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
+ "not divisible by page size.\n"
+ " paddr_min: 0x%lx page_size: 0x%x",
+ paddr_min, vm->page_size);
+
+ /* Locate memory region. */
+ region = memslot2region(vm, memslot);
+
+ /* Locate next available physical page at or above paddr_min. */
+ pg = paddr_min >> vm->page_shift;
+
+ if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
+ pg = sparsebit_next_set(region->unused_phy_pages, pg);
+ if (pg == 0) {
+ fprintf(stderr, "No guest physical page available, "
+ "paddr_min: 0x%lx page_size: 0x%x memslot: %u",
+ paddr_min, vm->page_size, memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ abort();
+ }
+ }
+
+ /* Specify page as in use and return its address. */
+ sparsebit_clear(region->unused_phy_pages, pg);
+
+ return pg * vm->page_size;
+}
+
+/* Address Guest Virtual to Host Virtual
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent host virtual address
+ */
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
new file mode 100644
index 000000000000..a0bd1980c81c
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -0,0 +1,67 @@
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#ifndef KVM_UTIL_INTERNAL_H
+#define KVM_UTIL_INTERNAL_H 1
+
+#include "sparsebit.h"
+
+#ifndef BITS_PER_BYTE
+#define BITS_PER_BYTE 8
+#endif
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+#endif
+
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
+
+/* Concrete definition of struct kvm_vm. */
+struct userspace_mem_region {
+ struct userspace_mem_region *next, *prev;
+ struct kvm_userspace_memory_region region;
+ struct sparsebit *unused_phy_pages;
+ int fd;
+ off_t offset;
+ void *host_mem;
+ void *mmap_start;
+ size_t mmap_size;
+};
+
+struct vcpu {
+ struct vcpu *next, *prev;
+ uint32_t id;
+ int fd;
+ struct kvm_run *state;
+};
+
+struct kvm_vm {
+ int mode;
+ int fd;
+ unsigned int page_size;
+ unsigned int page_shift;
+ uint64_t max_gfn;
+ struct vcpu *vcpu_head;
+ struct userspace_mem_region *userspace_mem_region_head;
+ struct sparsebit *vpages_valid;
+ struct sparsebit *vpages_mapped;
+ bool pgd_created;
+ vm_paddr_t pgd;
+};
+
+struct vcpu *vcpu_find(struct kvm_vm *vm,
+ uint32_t vcpuid);
+void vcpu_setup(struct kvm_vm *vm, int vcpuid);
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+ uint8_t indent);
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+ uint8_t indent);
+
+#endif
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
new file mode 100644
index 000000000000..b132bc95d183
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -0,0 +1,2087 @@
+/*
+ * Sparse bit array
+ *
+ * Copyright (C) 2018, Google LLC.
+ * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This library provides functions to support a memory efficient bit array,
+ * with an index size of 2^64. A sparsebit array is allocated through
+ * the use sparsebit_alloc() and free'd via sparsebit_free(),
+ * such as in the following:
+ *
+ * struct sparsebit *s;
+ * s = sparsebit_alloc();
+ * sparsebit_free(&s);
+ *
+ * The struct sparsebit type resolves down to a struct sparsebit.
+ * Note that, sparsebit_free() takes a pointer to the sparsebit
+ * structure. This is so that sparsebit_free() is able to poison
+ * the pointer (e.g. set it to NULL) to the struct sparsebit before
+ * returning to the caller.
+ *
+ * Between the return of sparsebit_alloc() and the call of
+ * sparsebit_free(), there are multiple query and modifying operations
+ * that can be performed on the allocated sparsebit array. All of
+ * these operations take as a parameter the value returned from
+ * sparsebit_alloc() and most also take a bit index. Frequently
+ * used routines include:
+ *
+ * ---- Query Operations
+ * sparsebit_is_set(s, idx)
+ * sparsebit_is_clear(s, idx)
+ * sparsebit_any_set(s)
+ * sparsebit_first_set(s)
+ * sparsebit_next_set(s, prev_idx)
+ *
+ * ---- Modifying Operations
+ * sparsebit_set(s, idx)
+ * sparsebit_clear(s, idx)
+ * sparsebit_set_num(s, idx, num);
+ * sparsebit_clear_num(s, idx, num);
+ *
+ * A common operation, is to itterate over all the bits set in a test
+ * sparsebit array. This can be done via code with the following structure:
+ *
+ * sparsebit_idx_t idx;
+ * if (sparsebit_any_set(s)) {
+ * idx = sparsebit_first_set(s);
+ * do {
+ * ...
+ * idx = sparsebit_next_set(s, idx);
+ * } while (idx != 0);
+ * }
+ *
+ * The index of the first bit set needs to be obtained via
+ * sparsebit_first_set(), because sparsebit_next_set(), needs
+ * the index of the previously set. The sparsebit_idx_t type is
+ * unsigned, so there is no previous index before 0 that is available.
+ * Also, the call to sparsebit_first_set() is not made unless there
+ * is at least 1 bit in the array set. This is because sparsebit_first_set()
+ * aborts if sparsebit_first_set() is called with no bits set.
+ * It is the callers responsibility to assure that the
+ * sparsebit array has at least a single bit set before calling
+ * sparsebit_first_set().
+ *
+ * ==== Implementation Overview ====
+ * For the most part the internal implementation of sparsebit is
+ * opaque to the caller. One important implementation detail that the
+ * caller may need to be aware of is the spatial complexity of the
+ * implementation. This implementation of a sparsebit array is not
+ * only sparse, in that it uses memory proportional to the number of bits
+ * set. It is also efficient in memory usage when most of the bits are
+ * set.
+ *
+ * At a high-level the state of the bit settings are maintained through
+ * the use of a binary-search tree, where each node contains at least
+ * the following members:
+ *
+ * typedef uint64_t sparsebit_idx_t;
+ * typedef uint64_t sparsebit_num_t;
+ *
+ * sparsebit_idx_t idx;
+ * uint32_t mask;
+ * sparsebit_num_t num_after;
+ *
+ * The idx member contains the bit index of the first bit described by this
+ * node, while the mask member stores the setting of the first 32-bits.
+ * The setting of the bit at idx + n, where 0 <= n < 32, is located in the
+ * mask member at 1 << n.
+ *
+ * Nodes are sorted by idx and the bits described by two nodes will never
+ * overlap. The idx member is always aligned to the mask size, i.e. a
+ * multiple of 32.
+ *
+ * Beyond a typical implementation, the nodes in this implementation also
+ * contains a member named num_after. The num_after member holds the
+ * number of bits immediately after the mask bits that are contiguously set.
+ * The use of the num_after member allows this implementation to efficiently
+ * represent cases where most bits are set. For example, the case of all
+ * but the last two bits set, is represented by the following two nodes:
+ *
+ * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
+ * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
+ *
+ * ==== Invariants ====
+ * This implementation usses the following invariants:
+ *
+ * + Node are only used to represent bits that are set.
+ * Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ * + Sum of bits set in all the nodes is equal to the value of
+ * the struct sparsebit_pvt num_set member.
+ *
+ * + The setting of at least one bit is always described in a nodes
+ * mask (mask >= 1).
+ *
+ * + A node with all mask bits set only occurs when the last bit
+ * described by the previous node is not equal to this nodes
+ * starting index - 1. All such occurences of this condition are
+ * avoided by moving the setting of the nodes mask bits into
+ * the previous nodes num_after setting.
+ *
+ * + Node starting index is evenly divisible by the number of bits
+ * within a nodes mask member.
+ *
+ * + Nodes never represent a range of bits that wrap around the
+ * highest supported index.
+ *
+ * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)
+ *
+ * As a consequence of the above, the num_after member of a node
+ * will always be <=:
+ *
+ * maximum_index - nodes_starting_index - number_of_mask_bits
+ *
+ * + Nodes within the binary search tree are sorted based on each
+ * nodes starting index.
+ *
+ * + The range of bits described by any two nodes do not overlap. The
+ * range of bits described by a single node is:
+ *
+ * start: node->idx
+ * end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
+ *
+ * Note, at times these invariants are temporarily violated for a
+ * specific portion of the code. For example, when setting a mask
+ * bit, there is a small delay between when the mask bit is set and the
+ * value in the struct sparsebit_pvt num_set member is updated. Other
+ * temporary violations occur when node_split() is called with a specified
+ * index and assures that a node where its mask represents the bit
+ * at the specified index exists. At times to do this node_split()
+ * must split an existing node into two nodes or create a node that
+ * has no bits set. Such temporary violations must be corrected before
+ * returning to the caller. These corrections are typically performed
+ * by the local function node_reduce().
+ */
+
+#include "test_util.h"
+#include "sparsebit.h"
+#include <limits.h>
+#include <assert.h>
+
+#define DUMP_LINE_MAX 100 /* Does not include indent amount */
+
+typedef uint32_t mask_t;
+#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
+
+struct node {
+ struct node *parent;
+ struct node *left;
+ struct node *right;
+ sparsebit_idx_t idx; /* index of least-significant bit in mask */
+ sparsebit_num_t num_after; /* num contiguously set after mask */
+ mask_t mask;
+};
+
+struct sparsebit {
+ /*
+ * Points to root node of the binary search
+ * tree. Equal to NULL when no bits are set in
+ * the entire sparsebit array.
+ */
+ struct node *root;
+
+ /*
+ * A redundant count of the total number of bits set. Used for
+ * diagnostic purposes and to change the time complexity of
+ * sparsebit_num_set() from O(n) to O(1).
+ * Note: Due to overflow, a value of 0 means none or all set.
+ */
+ sparsebit_num_t num_set;
+};
+
+/* Returns the number of set bits described by the settings
+ * of the node pointed to by nodep.
+ */
+static sparsebit_num_t node_num_set(struct node *nodep)
+{
+ return nodep->num_after + __builtin_popcount(nodep->mask);
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index.
+ */
+static struct node *node_first(struct sparsebit *s)
+{
+ struct node *nodep;
+
+ for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
+ ;
+
+ return nodep;
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index > the index of the node pointed to by np.
+ * Returns NULL if no node with a higher index exists.
+ */
+static struct node *node_next(struct sparsebit *s, struct node *np)
+{
+ struct node *nodep = np;
+
+ /*
+ * If current node has a right child, next node is the left-most
+ * of the right child.
+ */
+ if (nodep->right) {
+ for (nodep = nodep->right; nodep->left; nodep = nodep->left)
+ ;
+ return nodep;
+ }
+
+ /*
+ * No right child. Go up until node is left child of a parent.
+ * That parent is then the next node.
+ */
+ while (nodep->parent && nodep == nodep->parent->right)
+ nodep = nodep->parent;
+
+ return nodep->parent;
+}
+
+/* Searches for and returns a pointer to the node that describes the
+ * highest index < the index of the node pointed to by np.
+ * Returns NULL if no node with a lower index exists.
+ */
+static struct node *node_prev(struct sparsebit *s, struct node *np)
+{
+ struct node *nodep = np;
+
+ /*
+ * If current node has a left child, next node is the right-most
+ * of the left child.
+ */
+ if (nodep->left) {
+ for (nodep = nodep->left; nodep->right; nodep = nodep->right)
+ ;
+ return (struct node *) nodep;
+ }
+
+ /*
+ * No left child. Go up until node is right child of a parent.
+ * That parent is then the next node.
+ */
+ while (nodep->parent && nodep == nodep->parent->left)
+ nodep = nodep->parent;
+
+ return (struct node *) nodep->parent;
+}
+
+
+/* Allocates space to hold a copy of the node sub-tree pointed to by
+ * subtree and duplicates the bit settings to the newly allocated nodes.
+ * Returns the newly allocated copy of subtree.
+ */
+static struct node *node_copy_subtree(struct node *subtree)
+{
+ struct node *root;
+
+ /* Duplicate the node at the root of the subtree */
+ root = calloc(1, sizeof(*root));
+ if (!root) {
+ perror("calloc");
+ abort();
+ }
+
+ root->idx = subtree->idx;
+ root->mask = subtree->mask;
+ root->num_after = subtree->num_after;
+
+ /* As needed, recursively duplicate the left and right subtrees */
+ if (subtree->left) {
+ root->left = node_copy_subtree(subtree->left);
+ root->left->parent = root;
+ }
+
+ if (subtree->right) {
+ root->right = node_copy_subtree(subtree->right);
+ root->right->parent = root;
+ }
+
+ return root;
+}
+
+/* Searches for and returns a pointer to the node that describes the setting
+ * of the bit given by idx. A node describes the setting of a bit if its
+ * index is within the bits described by the mask bits or the number of
+ * contiguous bits set after the mask. Returns NULL if there is no such node.
+ */
+static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Find the node that describes the setting of the bit at idx */
+ for (nodep = s->root; nodep;
+ nodep = nodep->idx > idx ? nodep->left : nodep->right) {
+ if (idx >= nodep->idx &&
+ idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+ break;
+ }
+
+ return nodep;
+}
+
+/* Entry Requirements:
+ * + A node that describes the setting of idx is not already present.
+ *
+ * Adds a new node to describe the setting of the bit at the index given
+ * by idx. Returns a pointer to the newly added node.
+ *
+ * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.
+ */
+static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep, *parentp, *prev;
+
+ /* Allocate and initialize the new node. */
+ nodep = calloc(1, sizeof(*nodep));
+ if (!nodep) {
+ perror("calloc");
+ abort();
+ }
+
+ nodep->idx = idx & -MASK_BITS;
+
+ /* If no nodes, set it up as the root node. */
+ if (!s->root) {
+ s->root = nodep;
+ return nodep;
+ }
+
+ /*
+ * Find the parent where the new node should be attached
+ * and add the node there.
+ */
+ parentp = s->root;
+ while (true) {
+ if (idx < parentp->idx) {
+ if (!parentp->left) {
+ parentp->left = nodep;
+ nodep->parent = parentp;
+ break;
+ }
+ parentp = parentp->left;
+ } else {
+ assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);
+ if (!parentp->right) {
+ parentp->right = nodep;
+ nodep->parent = parentp;
+ break;
+ }
+ parentp = parentp->right;
+ }
+ }
+
+ /*
+ * Does num_after bits of previous node overlap with the mask
+ * of the new node? If so set the bits in the new nodes mask
+ * and reduce the previous nodes num_after.
+ */
+ prev = node_prev(s, nodep);
+ while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {
+ unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
+ - nodep->idx;
+ assert(prev->num_after > 0);
+ assert(n1 < MASK_BITS);
+ assert(!(nodep->mask & (1 << n1)));
+ nodep->mask |= (1 << n1);
+ prev->num_after--;
+ }
+
+ return nodep;
+}
+
+/* Returns whether all the bits in the sparsebit array are set. */
+bool sparsebit_all_set(struct sparsebit *s)
+{
+ /*
+ * If any nodes there must be at least one bit set. Only case
+ * where a bit is set and total num set is 0, is when all bits
+ * are set.
+ */
+ return s->root && s->num_set == 0;
+}
+
+/* Clears all bits described by the node pointed to by nodep, then
+ * removes the node.
+ */
+static void node_rm(struct sparsebit *s, struct node *nodep)
+{
+ struct node *tmp;
+ sparsebit_num_t num_set;
+
+ num_set = node_num_set(nodep);
+ assert(s->num_set >= num_set || sparsebit_all_set(s));
+ s->num_set -= node_num_set(nodep);
+
+ /* Have both left and right child */
+ if (nodep->left && nodep->right) {
+ /*
+ * Move left children to the leftmost leaf node
+ * of the right child.
+ */
+ for (tmp = nodep->right; tmp->left; tmp = tmp->left)
+ ;
+ tmp->left = nodep->left;
+ nodep->left = NULL;
+ tmp->left->parent = tmp;
+ }
+
+ /* Left only child */
+ if (nodep->left) {
+ if (!nodep->parent) {
+ s->root = nodep->left;
+ nodep->left->parent = NULL;
+ } else {
+ nodep->left->parent = nodep->parent;
+ if (nodep == nodep->parent->left)
+ nodep->parent->left = nodep->left;
+ else {
+ assert(nodep == nodep->parent->right);
+ nodep->parent->right = nodep->left;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+ }
+
+
+ /* Right only child */
+ if (nodep->right) {
+ if (!nodep->parent) {
+ s->root = nodep->right;
+ nodep->right->parent = NULL;
+ } else {
+ nodep->right->parent = nodep->parent;
+ if (nodep == nodep->parent->left)
+ nodep->parent->left = nodep->right;
+ else {
+ assert(nodep == nodep->parent->right);
+ nodep->parent->right = nodep->right;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+ }
+
+ /* Leaf Node */
+ if (!nodep->parent) {
+ s->root = NULL;
+ } else {
+ if (nodep->parent->left == nodep)
+ nodep->parent->left = NULL;
+ else {
+ assert(nodep == nodep->parent->right);
+ nodep->parent->right = NULL;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+}
+
+/* Splits the node containing the bit at idx so that there is a node
+ * that starts at the specified index. If no such node exists, a new
+ * node at the specified index is created. Returns the new node.
+ *
+ * idx must start of a mask boundary.
+ */
+static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep1, *nodep2;
+ sparsebit_idx_t offset;
+ sparsebit_num_t orig_num_after;
+
+ assert(!(idx % MASK_BITS));
+
+ /*
+ * Is there a node that describes the setting of idx?
+ * If not, add it.
+ */
+ nodep1 = node_find(s, idx);
+ if (!nodep1)
+ return node_add(s, idx);
+
+ /*
+ * All done if the starting index of the node is where the
+ * split should occur.
+ */
+ if (nodep1->idx == idx)
+ return nodep1;
+
+ /*
+ * Split point not at start of mask, so it must be part of
+ * bits described by num_after.
+ */
+
+ /*
+ * Calculate offset within num_after for where the split is
+ * to occur.
+ */
+ offset = idx - (nodep1->idx + MASK_BITS);
+ orig_num_after = nodep1->num_after;
+
+ /*
+ * Add a new node to describe the bits starting at
+ * the split point.
+ */
+ nodep1->num_after = offset;
+ nodep2 = node_add(s, idx);
+
+ /* Move bits after the split point into the new node */
+ nodep2->num_after = orig_num_after - offset;
+ if (nodep2->num_after >= MASK_BITS) {
+ nodep2->mask = ~(mask_t) 0;
+ nodep2->num_after -= MASK_BITS;
+ } else {
+ nodep2->mask = (1 << nodep2->num_after) - 1;
+ nodep2->num_after = 0;
+ }
+
+ return nodep2;
+}
+
+/* Iteratively reduces the node pointed to by nodep and its adjacent
+ * nodes into a more compact form. For example, a node with a mask with
+ * all bits set adjacent to a previous node, will get combined into a
+ * single node with an increased num_after setting.
+ *
+ * After each reduction, a further check is made to see if additional
+ * reductions are possible with the new previous and next nodes. Note,
+ * a search for a reduction is only done across the nodes nearest nodep
+ * and those that became part of a reduction. Reductions beyond nodep
+ * and the adjacent nodes that are reduced are not discovered. It is the
+ * responsibility of the caller to pass a nodep that is within one node
+ * of each possible reduction.
+ *
+ * This function does not fix the temporary violation of all invariants.
+ * For example it does not fix the case where the bit settings described
+ * by two or more nodes overlap. Such a violation introduces the potential
+ * complication of a bit setting for a specific index having different settings
+ * in different nodes. This would then introduce the further complication
+ * of which node has the correct setting of the bit and thus such conditions
+ * are not allowed.
+ *
+ * This function is designed to fix invariant violations that are introduced
+ * by node_split() and by changes to the nodes mask or num_after members.
+ * For example, when setting a bit within a nodes mask, the function that
+ * sets the bit doesn't have to worry about whether the setting of that
+ * bit caused the mask to have leading only or trailing only bits set.
+ * Instead, the function can call node_reduce(), with nodep equal to the
+ * node address that it set a mask bit in, and node_reduce() will notice
+ * the cases of leading or trailing only bits and that there is an
+ * adjacent node that the bit settings could be merged into.
+ *
+ * This implementation specifically detects and corrects violation of the
+ * following invariants:
+ *
+ * + Node are only used to represent bits that are set.
+ * Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ * + The setting of at least one bit is always described in a nodes
+ * mask (mask >= 1).
+ *
+ * + A node with all mask bits set only occurs when the last bit
+ * described by the previous node is not equal to this nodes
+ * starting index - 1. All such occurences of this condition are
+ * avoided by moving the setting of the nodes mask bits into
+ * the previous nodes num_after setting.
+ */
+static void node_reduce(struct sparsebit *s, struct node *nodep)
+{
+ bool reduction_performed;
+
+ do {
+ reduction_performed = false;
+ struct node *prev, *next, *tmp;
+
+ /* 1) Potential reductions within the current node. */
+
+ /* Nodes with all bits cleared may be removed. */
+ if (nodep->mask == 0 && nodep->num_after == 0) {
+ /*
+ * About to remove the node pointed to by
+ * nodep, which normally would cause a problem
+ * for the next pass through the reduction loop,
+ * because the node at the starting point no longer
+ * exists. This potential problem is handled
+ * by first remembering the location of the next
+ * or previous nodes. Doesn't matter which, because
+ * once the node at nodep is removed, there will be
+ * no other nodes between prev and next.
+ *
+ * Note, the checks performed on nodep against both
+ * both prev and next both check for an adjacent
+ * node that can be reduced into a single node. As
+ * such, after removing the node at nodep, doesn't
+ * matter whether the nodep for the next pass
+ * through the loop is equal to the previous pass
+ * prev or next node. Either way, on the next pass
+ * the one not selected will become either the
+ * prev or next node.
+ */
+ tmp = node_next(s, nodep);
+ if (!tmp)
+ tmp = node_prev(s, nodep);
+
+ node_rm(s, nodep);
+ nodep = NULL;
+
+ nodep = tmp;
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * When the mask is 0, can reduce the amount of num_after
+ * bits by moving the initial num_after bits into the mask.
+ */
+ if (nodep->mask == 0) {
+ assert(nodep->num_after != 0);
+ assert(nodep->idx + MASK_BITS > nodep->idx);
+
+ nodep->idx += MASK_BITS;
+
+ if (nodep->num_after >= MASK_BITS) {
+ nodep->mask = ~0;
+ nodep->num_after -= MASK_BITS;
+ } else {
+ nodep->mask = (1u << nodep->num_after) - 1;
+ nodep->num_after = 0;
+ }
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * 2) Potential reductions between the current and
+ * previous nodes.
+ */
+ prev = node_prev(s, nodep);
+ if (prev) {
+ sparsebit_idx_t prev_highest_bit;
+
+ /* Nodes with no bits set can be removed. */
+ if (prev->mask == 0 && prev->num_after == 0) {
+ node_rm(s, prev);
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * All mask bits set and previous node has
+ * adjacent index.
+ */
+ if (nodep->mask + 1 == 0 &&
+ prev->idx + MASK_BITS == nodep->idx) {
+ prev->num_after += MASK_BITS + nodep->num_after;
+ nodep->mask = 0;
+ nodep->num_after = 0;
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * Is node adjacent to previous node and the node
+ * contains a single contiguous range of bits
+ * starting from the beginning of the mask?
+ */
+ prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;
+ if (prev_highest_bit + 1 == nodep->idx &&
+ (nodep->mask | (nodep->mask >> 1)) == nodep->mask) {
+ /*
+ * How many contiguous bits are there?
+ * Is equal to the total number of set
+ * bits, due to an earlier check that
+ * there is a single contiguous range of
+ * set bits.
+ */
+ unsigned int num_contiguous
+ = __builtin_popcount(nodep->mask);
+ assert((num_contiguous > 0) &&
+ ((1ULL << num_contiguous) - 1) == nodep->mask);
+
+ prev->num_after += num_contiguous;
+ nodep->mask = 0;
+
+ /*
+ * For predictable performance, handle special
+ * case where all mask bits are set and there
+ * is a non-zero num_after setting. This code
+ * is functionally correct without the following
+ * conditionalized statements, but without them
+ * the value of num_after is only reduced by
+ * the number of mask bits per pass. There are
+ * cases where num_after can be close to 2^64.
+ * Without this code it could take nearly
+ * (2^64) / 32 passes to perform the full
+ * reduction.
+ */
+ if (num_contiguous == MASK_BITS) {
+ prev->num_after += nodep->num_after;
+ nodep->num_after = 0;
+ }
+
+ reduction_performed = true;
+ continue;
+ }
+ }
+
+ /*
+ * 3) Potential reductions between the current and
+ * next nodes.
+ */
+ next = node_next(s, nodep);
+ if (next) {
+ /* Nodes with no bits set can be removed. */
+ if (next->mask == 0 && next->num_after == 0) {
+ node_rm(s, next);
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * Is next node index adjacent to current node
+ * and has a mask with all bits set?
+ */
+ if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&
+ next->mask == ~(mask_t) 0) {
+ nodep->num_after += MASK_BITS;
+ next->mask = 0;
+ nodep->num_after += next->num_after;
+ next->num_after = 0;
+
+ node_rm(s, next);
+ next = NULL;
+
+ reduction_performed = true;
+ continue;
+ }
+ }
+ } while (nodep && reduction_performed);
+}
+
+/* Returns whether the bit at the index given by idx, within the
+ * sparsebit array is set or not.
+ */
+bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Find the node that describes the setting of the bit at idx */
+ for (nodep = s->root; nodep;
+ nodep = nodep->idx > idx ? nodep->left : nodep->right)
+ if (idx >= nodep->idx &&
+ idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+ goto have_node;
+
+ return false;
+
+have_node:
+ /* Bit is set if it is any of the bits described by num_after */
+ if (nodep->num_after && idx >= nodep->idx + MASK_BITS)
+ return true;
+
+ /* Is the corresponding mask bit set */
+ assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);
+ return !!(nodep->mask & (1 << (idx - nodep->idx)));
+}
+
+/* Within the sparsebit array pointed to by s, sets the bit
+ * at the index given by idx.
+ */
+static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Skip bits that are already set */
+ if (sparsebit_is_set(s, idx))
+ return;
+
+ /*
+ * Get a node where the bit at idx is described by the mask.
+ * The node_split will also create a node, if there isn't
+ * already a node that describes the setting of bit.
+ */
+ nodep = node_split(s, idx & -MASK_BITS);
+
+ /* Set the bit within the nodes mask */
+ assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+ assert(!(nodep->mask & (1 << (idx - nodep->idx))));
+ nodep->mask |= 1 << (idx - nodep->idx);
+ s->num_set++;
+
+ node_reduce(s, nodep);
+}
+
+/* Within the sparsebit array pointed to by s, clears the bit
+ * at the index given by idx.
+ */
+static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Skip bits that are already cleared */
+ if (!sparsebit_is_set(s, idx))
+ return;
+
+ /* Is there a node that describes the setting of this bit? */
+ nodep = node_find(s, idx);
+ if (!nodep)
+ return;
+
+ /*
+ * If a num_after bit, split the node, so that the bit is
+ * part of a node mask.
+ */
+ if (idx >= nodep->idx + MASK_BITS)
+ nodep = node_split(s, idx & -MASK_BITS);
+
+ /*
+ * After node_split above, bit at idx should be within the mask.
+ * Clear that bit.
+ */
+ assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+ assert(nodep->mask & (1 << (idx - nodep->idx)));
+ nodep->mask &= ~(1 << (idx - nodep->idx));
+ assert(s->num_set > 0 || sparsebit_all_set(s));
+ s->num_set--;
+
+ node_reduce(s, nodep);
+}
+
+/* Recursively dumps to the FILE stream given by stream the contents
+ * of the sub-tree of nodes pointed to by nodep. Each line of output
+ * is prefixed by the number of spaces given by indent. On each
+ * recursion, the indent amount is increased by 2. This causes nodes
+ * at each level deeper into the binary search tree to be displayed
+ * with a greater indent.
+ */
+static void dump_nodes(FILE *stream, struct node *nodep,
+ unsigned int indent)
+{
+ char *node_type;
+
+ /* Dump contents of node */
+ if (!nodep->parent)
+ node_type = "root";
+ else if (nodep == nodep->parent->left)
+ node_type = "left";
+ else {
+ assert(nodep == nodep->parent->right);
+ node_type = "right";
+ }
+ fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
+ fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "",
+ nodep->parent, nodep->left, nodep->right);
+ fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
+ indent, "", nodep->idx, nodep->mask, nodep->num_after);
+
+ /* If present, dump contents of left child nodes */
+ if (nodep->left)
+ dump_nodes(stream, nodep->left, indent + 2);
+
+ /* If present, dump contents of right child nodes */
+ if (nodep->right)
+ dump_nodes(stream, nodep->right, indent + 2);
+}
+
+static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)
+{
+ mask_t leading = (mask_t)1 << start;
+ int n1 = __builtin_ctz(nodep->mask & -leading);
+
+ return nodep->idx + n1;
+}
+
+static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
+{
+ mask_t leading = (mask_t)1 << start;
+ int n1 = __builtin_ctz(~nodep->mask & -leading);
+
+ return nodep->idx + n1;
+}
+
+/* Dumps to the FILE stream specified by stream, the implementation dependent
+ * internal state of s. Each line of output is prefixed with the number
+ * of spaces given by indent. The output is completely implementation
+ * dependent and subject to change. Output from this function should only
+ * be used for diagnostic purposes. For example, this function can be
+ * used by test cases after they detect an unexpected condition, as a means
+ * to capture diagnostic information.
+ */
+static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
+ unsigned int indent)
+{
+ /* Dump the contents of s */
+ fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
+ fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
+
+ if (s->root)
+ dump_nodes(stream, s->root, indent);
+}
+
+/* Allocates and returns a new sparsebit array. The initial state
+ * of the newly allocated sparsebit array has all bits cleared.
+ */
+struct sparsebit *sparsebit_alloc(void)
+{
+ struct sparsebit *s;
+
+ /* Allocate top level structure. */
+ s = calloc(1, sizeof(*s));
+ if (!s) {
+ perror("calloc");
+ abort();
+ }
+
+ return s;
+}
+
+/* Frees the implementation dependent data for the sparsebit array
+ * pointed to by s and poisons the pointer to that data.
+ */
+void sparsebit_free(struct sparsebit **sbitp)
+{
+ struct sparsebit *s = *sbitp;
+
+ if (!s)
+ return;
+
+ sparsebit_clear_all(s);
+ free(s);
+ *sbitp = NULL;
+}
+
+/* Makes a copy of the sparsebit array given by s, to the sparsebit
+ * array given by d. Note, d must have already been allocated via
+ * sparsebit_alloc(). It can though already have bits set, which
+ * if different from src will be cleared.
+ */
+void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
+{
+ /* First clear any bits already set in the destination */
+ sparsebit_clear_all(d);
+
+ if (s->root) {
+ d->root = node_copy_subtree(s->root);
+ d->num_set = s->num_set;
+ }
+}
+
+/* Returns whether num consecutive bits starting at idx are all set. */
+bool sparsebit_is_set_num(struct sparsebit *s,
+ sparsebit_idx_t idx, sparsebit_num_t num)
+{
+ sparsebit_idx_t next_cleared;
+
+ assert(num > 0);
+ assert(idx + num - 1 >= idx);
+
+ /* With num > 0, the first bit must be set. */
+ if (!sparsebit_is_set(s, idx))
+ return false;
+
+ /* Find the next cleared bit */
+ next_cleared = sparsebit_next_clear(s, idx);
+
+ /*
+ * If no cleared bits beyond idx, then there are at least num
+ * set bits. idx + num doesn't wrap. Otherwise check if
+ * there are enough set bits between idx and the next cleared bit.
+ */
+ return next_cleared == 0 || next_cleared - idx >= num;
+}
+
+/* Returns whether the bit at the index given by idx. */
+bool sparsebit_is_clear(struct sparsebit *s,
+ sparsebit_idx_t idx)
+{
+ return !sparsebit_is_set(s, idx);
+}
+
+/* Returns whether num consecutive bits starting at idx are all cleared. */
+bool sparsebit_is_clear_num(struct sparsebit *s,
+ sparsebit_idx_t idx, sparsebit_num_t num)
+{
+ sparsebit_idx_t next_set;
+
+ assert(num > 0);
+ assert(idx + num - 1 >= idx);
+
+ /* With num > 0, the first bit must be cleared. */
+ if (!sparsebit_is_clear(s, idx))
+ return false;
+
+ /* Find the next set bit */
+ next_set = sparsebit_next_set(s, idx);
+
+ /*
+ * If no set bits beyond idx, then there are at least num
+ * cleared bits. idx + num doesn't wrap. Otherwise check if
+ * there are enough cleared bits between idx and the next set bit.
+ */
+ return next_set == 0 || next_set - idx >= num;
+}
+
+/* Returns the total number of bits set. Note: 0 is also returned for
+ * the case of all bits set. This is because with all bits set, there
+ * is 1 additional bit set beyond what can be represented in the return
+ * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
+ * to determine if the sparsebit array has any bits set.
+ */
+sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
+{
+ return s->num_set;
+}
+
+/* Returns whether any bit is set in the sparsebit array. */
+bool sparsebit_any_set(struct sparsebit *s)
+{
+ /*
+ * Nodes only describe set bits. If any nodes then there
+ * is at least 1 bit set.
+ */
+ if (!s->root)
+ return false;
+
+ /*
+ * Every node should have a non-zero mask. For now will
+ * just assure that the root node has a non-zero mask,
+ * which is a quick check that at least 1 bit is set.
+ */
+ assert(s->root->mask != 0);
+ assert(s->num_set > 0 ||
+ (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&
+ s->root->mask == ~(mask_t) 0));
+
+ return true;
+}
+
+/* Returns whether all the bits in the sparsebit array are cleared. */
+bool sparsebit_all_clear(struct sparsebit *s)
+{
+ return !sparsebit_any_set(s);
+}
+
+/* Returns whether all the bits in the sparsebit array are set. */
+bool sparsebit_any_clear(struct sparsebit *s)
+{
+ return !sparsebit_all_set(s);
+}
+
+/* Returns the index of the first set bit. Abort if no bits are set.
+ */
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
+{
+ struct node *nodep;
+
+ /* Validate at least 1 bit is set */
+ assert(sparsebit_any_set(s));
+
+ nodep = node_first(s);
+ return node_first_set(nodep, 0);
+}
+
+/* Returns the index of the first cleared bit. Abort if
+ * no bits are cleared.
+ */
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
+{
+ struct node *nodep1, *nodep2;
+
+ /* Validate at least 1 bit is cleared. */
+ assert(sparsebit_any_clear(s));
+
+ /* If no nodes or first node index > 0 then lowest cleared is 0 */
+ nodep1 = node_first(s);
+ if (!nodep1 || nodep1->idx > 0)
+ return 0;
+
+ /* Does the mask in the first node contain any cleared bits. */
+ if (nodep1->mask != ~(mask_t) 0)
+ return node_first_clear(nodep1, 0);
+
+ /*
+ * All mask bits set in first node. If there isn't a second node
+ * then the first cleared bit is the first bit after the bits
+ * described by the first node.
+ */
+ nodep2 = node_next(s, nodep1);
+ if (!nodep2) {
+ /*
+ * No second node. First cleared bit is first bit beyond
+ * bits described by first node.
+ */
+ assert(nodep1->mask == ~(mask_t) 0);
+ assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+ }
+
+ /*
+ * There is a second node.
+ * If it is not adjacent to the first node, then there is a gap
+ * of cleared bits between the nodes, and the first cleared bit
+ * is the first bit within the gap.
+ */
+ if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+ /*
+ * Second node is adjacent to the first node.
+ * Because it is adjacent, its mask should be non-zero. If all
+ * its mask bits are set, then with it being adjacent, it should
+ * have had the mask bits moved into the num_after setting of the
+ * previous node.
+ */
+ return node_first_clear(nodep2, 0);
+}
+
+/* Returns index of next bit set within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are set.
+ */
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
+ sparsebit_idx_t prev)
+{
+ sparsebit_idx_t lowest_possible = prev + 1;
+ sparsebit_idx_t start;
+ struct node *nodep;
+
+ /* A bit after the highest index can't be set. */
+ if (lowest_possible == 0)
+ return 0;
+
+ /*
+ * Find the leftmost 'candidate' overlapping or to the right
+ * of lowest_possible.
+ */
+ struct node *candidate = NULL;
+
+ /* True iff lowest_possible is within candidate */
+ bool contains = false;
+
+ /*
+ * Find node that describes setting of bit at lowest_possible.
+ * If such a node doesn't exist, find the node with the lowest
+ * starting index that is > lowest_possible.
+ */
+ for (nodep = s->root; nodep;) {
+ if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
+ >= lowest_possible) {
+ candidate = nodep;
+ if (candidate->idx <= lowest_possible) {
+ contains = true;
+ break;
+ }
+ nodep = nodep->left;
+ } else {
+ nodep = nodep->right;
+ }
+ }
+ if (!candidate)
+ return 0;
+
+ assert(candidate->mask != 0);
+
+ /* Does the candidate node describe the setting of lowest_possible? */
+ if (!contains) {
+ /*
+ * Candidate doesn't describe setting of bit at lowest_possible.
+ * Candidate points to the first node with a starting index
+ * > lowest_possible.
+ */
+ assert(candidate->idx > lowest_possible);
+
+ return node_first_set(candidate, 0);
+ }
+
+ /*
+ * Candidate describes setting of bit at lowest_possible.
+ * Note: although the node describes the setting of the bit
+ * at lowest_possible, its possible that its setting and the
+ * setting of all latter bits described by this node are 0.
+ * For now, just handle the cases where this node describes
+ * a bit at or after an index of lowest_possible that is set.
+ */
+ start = lowest_possible - candidate->idx;
+
+ if (start < MASK_BITS && candidate->mask >= (1 << start))
+ return node_first_set(candidate, start);
+
+ if (candidate->num_after) {
+ sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;
+
+ return lowest_possible < first_num_after_idx
+ ? first_num_after_idx : lowest_possible;
+ }
+
+ /*
+ * Although candidate node describes setting of bit at
+ * the index of lowest_possible, all bits at that index and
+ * latter that are described by candidate are cleared. With
+ * this, the next bit is the first bit in the next node, if
+ * such a node exists. If a next node doesn't exist, then
+ * there is no next set bit.
+ */
+ candidate = node_next(s, candidate);
+ if (!candidate)
+ return 0;
+
+ return node_first_set(candidate, 0);
+}
+
+/* Returns index of next bit cleared within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are cleared.
+ */
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
+ sparsebit_idx_t prev)
+{
+ sparsebit_idx_t lowest_possible = prev + 1;
+ sparsebit_idx_t idx;
+ struct node *nodep1, *nodep2;
+
+ /* A bit after the highest index can't be set. */
+ if (lowest_possible == 0)
+ return 0;
+
+ /*
+ * Does a node describing the setting of lowest_possible exist?
+ * If not, the bit at lowest_possible is cleared.
+ */
+ nodep1 = node_find(s, lowest_possible);
+ if (!nodep1)
+ return lowest_possible;
+
+ /* Does a mask bit in node 1 describe the next cleared bit. */
+ for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)
+ if (!(nodep1->mask & (1 << idx)))
+ return nodep1->idx + idx;
+
+ /*
+ * Next cleared bit is not described by node 1. If there
+ * isn't a next node, then next cleared bit is described
+ * by bit after the bits described by the first node.
+ */
+ nodep2 = node_next(s, nodep1);
+ if (!nodep2)
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+ /*
+ * There is a second node.
+ * If it is not adjacent to the first node, then there is a gap
+ * of cleared bits between the nodes, and the next cleared bit
+ * is the first bit within the gap.
+ */
+ if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+ /*
+ * Second node is adjacent to the first node.
+ * Because it is adjacent, its mask should be non-zero. If all
+ * its mask bits are set, then with it being adjacent, it should
+ * have had the mask bits moved into the num_after setting of the
+ * previous node.
+ */
+ return node_first_clear(nodep2, 0);
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively set
+ * bits. Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ sparsebit_idx_t idx;
+
+ assert(num >= 1);
+
+ for (idx = sparsebit_next_set(s, start);
+ idx != 0 && idx + num - 1 >= idx;
+ idx = sparsebit_next_set(s, idx)) {
+ assert(sparsebit_is_set(s, idx));
+
+ /*
+ * Does the sequence of bits starting at idx consist of
+ * num set bits?
+ */
+ if (sparsebit_is_set_num(s, idx, num))
+ return idx;
+
+ /*
+ * Sequence of set bits at idx isn't large enough.
+ * Skip this entire sequence of set bits.
+ */
+ idx = sparsebit_next_clear(s, idx);
+ if (idx == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively cleared
+ * bits. Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ sparsebit_idx_t idx;
+
+ assert(num >= 1);
+
+ for (idx = sparsebit_next_clear(s, start);
+ idx != 0 && idx + num - 1 >= idx;
+ idx = sparsebit_next_clear(s, idx)) {
+ assert(sparsebit_is_clear(s, idx));
+
+ /*
+ * Does the sequence of bits starting at idx consist of
+ * num cleared bits?
+ */
+ if (sparsebit_is_clear_num(s, idx, num))
+ return idx;
+
+ /*
+ * Sequence of cleared bits at idx isn't large enough.
+ * Skip this entire sequence of cleared bits.
+ */
+ idx = sparsebit_next_set(s, idx);
+ if (idx == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
+/* Sets the bits * in the inclusive range idx through idx + num - 1. */
+void sparsebit_set_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ struct node *nodep, *next;
+ unsigned int n1;
+ sparsebit_idx_t idx;
+ sparsebit_num_t n;
+ sparsebit_idx_t middle_start, middle_end;
+
+ assert(num > 0);
+ assert(start + num - 1 >= start);
+
+ /*
+ * Leading - bits before first mask boundary.
+ *
+ * TODO(lhuemill): With some effort it may be possible to
+ * replace the following loop with a sequential sequence
+ * of statements. High level sequence would be:
+ *
+ * 1. Use node_split() to force node that describes setting
+ * of idx to be within the mask portion of a node.
+ * 2. Form mask of bits to be set.
+ * 3. Determine number of mask bits already set in the node
+ * and store in a local variable named num_already_set.
+ * 4. Set the appropriate mask bits within the node.
+ * 5. Increment struct sparsebit_pvt num_set member
+ * by the number of bits that were actually set.
+ * Exclude from the counts bits that were already set.
+ * 6. Before returning to the caller, use node_reduce() to
+ * handle the multiple corner cases that this method
+ * introduces.
+ */
+ for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+ bit_set(s, idx);
+
+ /* Middle - bits spanning one or more entire mask */
+ middle_start = idx;
+ middle_end = middle_start + (n & -MASK_BITS) - 1;
+ if (n >= MASK_BITS) {
+ nodep = node_split(s, middle_start);
+
+ /*
+ * As needed, split just after end of middle bits.
+ * No split needed if end of middle bits is at highest
+ * supported bit index.
+ */
+ if (middle_end + 1 > middle_end)
+ (void) node_split(s, middle_end + 1);
+
+ /* Delete nodes that only describe bits within the middle. */
+ for (next = node_next(s, nodep);
+ next && (next->idx < middle_end);
+ next = node_next(s, nodep)) {
+ assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+ node_rm(s, next);
+ next = NULL;
+ }
+
+ /* As needed set each of the mask bits */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (!(nodep->mask & (1 << n1))) {
+ nodep->mask |= 1 << n1;
+ s->num_set++;
+ }
+ }
+
+ s->num_set -= nodep->num_after;
+ nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;
+ s->num_set += nodep->num_after;
+
+ node_reduce(s, nodep);
+ }
+ idx = middle_end + 1;
+ n -= middle_end - middle_start + 1;
+
+ /* Trailing - bits at and beyond last mask boundary */
+ assert(n < MASK_BITS);
+ for (; n > 0; idx++, n--)
+ bit_set(s, idx);
+}
+
+/* Clears the bits * in the inclusive range idx through idx + num - 1. */
+void sparsebit_clear_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ struct node *nodep, *next;
+ unsigned int n1;
+ sparsebit_idx_t idx;
+ sparsebit_num_t n;
+ sparsebit_idx_t middle_start, middle_end;
+
+ assert(num > 0);
+ assert(start + num - 1 >= start);
+
+ /* Leading - bits before first mask boundary */
+ for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+ bit_clear(s, idx);
+
+ /* Middle - bits spanning one or more entire mask */
+ middle_start = idx;
+ middle_end = middle_start + (n & -MASK_BITS) - 1;
+ if (n >= MASK_BITS) {
+ nodep = node_split(s, middle_start);
+
+ /*
+ * As needed, split just after end of middle bits.
+ * No split needed if end of middle bits is at highest
+ * supported bit index.
+ */
+ if (middle_end + 1 > middle_end)
+ (void) node_split(s, middle_end + 1);
+
+ /* Delete nodes that only describe bits within the middle. */
+ for (next = node_next(s, nodep);
+ next && (next->idx < middle_end);
+ next = node_next(s, nodep)) {
+ assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+ node_rm(s, next);
+ next = NULL;
+ }
+
+ /* As needed clear each of the mask bits */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1)) {
+ nodep->mask &= ~(1 << n1);
+ s->num_set--;
+ }
+ }
+
+ /* Clear any bits described by num_after */
+ s->num_set -= nodep->num_after;
+ nodep->num_after = 0;
+
+ /*
+ * Delete the node that describes the beginning of
+ * the middle bits and perform any allowed reductions
+ * with the nodes prev or next of nodep.
+ */
+ node_reduce(s, nodep);
+ nodep = NULL;
+ }
+ idx = middle_end + 1;
+ n -= middle_end - middle_start + 1;
+
+ /* Trailing - bits at and beyond last mask boundary */
+ assert(n < MASK_BITS);
+ for (; n > 0; idx++, n--)
+ bit_clear(s, idx);
+}
+
+/* Sets the bit at the index given by idx. */
+void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ sparsebit_set_num(s, idx, 1);
+}
+
+/* Clears the bit at the index given by idx. */
+void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ sparsebit_clear_num(s, idx, 1);
+}
+
+/* Sets the bits in the entire addressable range of the sparsebit array. */
+void sparsebit_set_all(struct sparsebit *s)
+{
+ sparsebit_set(s, 0);
+ sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);
+ assert(sparsebit_all_set(s));
+}
+
+/* Clears the bits in the entire addressable range of the sparsebit array. */
+void sparsebit_clear_all(struct sparsebit *s)
+{
+ sparsebit_clear(s, 0);
+ sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);
+ assert(!sparsebit_any_set(s));
+}
+
+static size_t display_range(FILE *stream, sparsebit_idx_t low,
+ sparsebit_idx_t high, bool prepend_comma_space)
+{
+ char *fmt_str;
+ size_t sz;
+
+ /* Determine the printf format string */
+ if (low == high)
+ fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";
+ else
+ fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
+
+ /*
+ * When stream is NULL, just determine the size of what would
+ * have been printed, else print the range.
+ */
+ if (!stream)
+ sz = snprintf(NULL, 0, fmt_str, low, high);
+ else
+ sz = fprintf(stream, fmt_str, low, high);
+
+ return sz;
+}
+
+
+/* Dumps to the FILE stream given by stream, the bit settings
+ * of s. Each line of output is prefixed with the number of
+ * spaces given by indent. The length of each line is implementation
+ * dependent and does not depend on the indent amount. The following
+ * is an example output of a sparsebit array that has bits:
+ *
+ * 0x5, 0x8, 0xa:0xe, 0x12
+ *
+ * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18
+ * are set. Note that a ':', instead of a '-' is used to specify a range of
+ * contiguous bits. This is done because '-' is used to specify command-line
+ * options, and sometimes ranges are specified as command-line arguments.
+ */
+void sparsebit_dump(FILE *stream, struct sparsebit *s,
+ unsigned int indent)
+{
+ size_t current_line_len = 0;
+ size_t sz;
+ struct node *nodep;
+
+ if (!sparsebit_any_set(s))
+ return;
+
+ /* Display initial indent */
+ fprintf(stream, "%*s", indent, "");
+
+ /* For each node */
+ for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {
+ unsigned int n1;
+ sparsebit_idx_t low, high;
+
+ /* For each group of bits in the mask */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1)) {
+ low = high = nodep->idx + n1;
+
+ for (; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1))
+ high = nodep->idx + n1;
+ else
+ break;
+ }
+
+ if ((n1 == MASK_BITS) && nodep->num_after)
+ high += nodep->num_after;
+
+ /*
+ * How much room will it take to display
+ * this range.
+ */
+ sz = display_range(NULL, low, high,
+ current_line_len != 0);
+
+ /*
+ * If there is not enough room, display
+ * a newline plus the indent of the next
+ * line.
+ */
+ if (current_line_len + sz > DUMP_LINE_MAX) {
+ fputs("\n", stream);
+ fprintf(stream, "%*s", indent, "");
+ current_line_len = 0;
+ }
+
+ /* Display the range */
+ sz = display_range(stream, low, high,
+ current_line_len != 0);
+ current_line_len += sz;
+ }
+ }
+
+ /*
+ * If num_after and most significant-bit of mask is not
+ * set, then still need to display a range for the bits
+ * described by num_after.
+ */
+ if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {
+ low = nodep->idx + MASK_BITS;
+ high = nodep->idx + MASK_BITS + nodep->num_after - 1;
+
+ /*
+ * How much room will it take to display
+ * this range.
+ */
+ sz = display_range(NULL, low, high,
+ current_line_len != 0);
+
+ /*
+ * If there is not enough room, display
+ * a newline plus the indent of the next
+ * line.
+ */
+ if (current_line_len + sz > DUMP_LINE_MAX) {
+ fputs("\n", stream);
+ fprintf(stream, "%*s", indent, "");
+ current_line_len = 0;
+ }
+
+ /* Display the range */
+ sz = display_range(stream, low, high,
+ current_line_len != 0);
+ current_line_len += sz;
+ }
+ }
+ fputs("\n", stream);
+}
+
+/* Validates the internal state of the sparsebit array given by
+ * s. On error, diagnostic information is printed to stderr and
+ * abort is called.
+ */
+void sparsebit_validate_internal(struct sparsebit *s)
+{
+ bool error_detected = false;
+ struct node *nodep, *prev = NULL;
+ sparsebit_num_t total_bits_set = 0;
+ unsigned int n1;
+
+ /* For each node */
+ for (nodep = node_first(s); nodep;
+ prev = nodep, nodep = node_next(s, nodep)) {
+
+ /*
+ * Increase total bits set by the number of bits set
+ * in this node.
+ */
+ for (n1 = 0; n1 < MASK_BITS; n1++)
+ if (nodep->mask & (1 << n1))
+ total_bits_set++;
+
+ total_bits_set += nodep->num_after;
+
+ /*
+ * Arbitrary choice as to whether a mask of 0 is allowed
+ * or not. For diagnostic purposes it is beneficial to
+ * have only one valid means to represent a set of bits.
+ * To support this an arbitrary choice has been made
+ * to not allow a mask of zero.
+ */
+ if (nodep->mask == 0) {
+ fprintf(stderr, "Node mask of zero, "
+ "nodep: %p nodep->mask: 0x%x",
+ nodep, nodep->mask);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * Validate num_after is not greater than the max index
+ * - the number of mask bits. The num_after member
+ * uses 0-based indexing and thus has no value that
+ * represents all bits set. This limitation is handled
+ * by requiring a non-zero mask. With a non-zero mask,
+ * MASK_BITS worth of bits are described by the mask,
+ * which makes the largest needed num_after equal to:
+ *
+ * (~(sparsebit_num_t) 0) - MASK_BITS + 1
+ */
+ if (nodep->num_after
+ > (~(sparsebit_num_t) 0) - MASK_BITS + 1) {
+ fprintf(stderr, "num_after too large, "
+ "nodep: %p nodep->num_after: 0x%lx",
+ nodep, nodep->num_after);
+ error_detected = true;
+ break;
+ }
+
+ /* Validate node index is divisible by the mask size */
+ if (nodep->idx % MASK_BITS) {
+ fprintf(stderr, "Node index not divisible by "
+ "mask size,\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "MASK_BITS: %lu\n",
+ nodep, nodep->idx, MASK_BITS);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * Validate bits described by node don't wrap beyond the
+ * highest supported index.
+ */
+ if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {
+ fprintf(stderr, "Bits described by node wrap "
+ "beyond highest supported index,\n"
+ " nodep: %p nodep->idx: 0x%lx\n"
+ " MASK_BITS: %lu nodep->num_after: 0x%lx",
+ nodep, nodep->idx, MASK_BITS, nodep->num_after);
+ error_detected = true;
+ break;
+ }
+
+ /* Check parent pointers. */
+ if (nodep->left) {
+ if (nodep->left->parent != nodep) {
+ fprintf(stderr, "Left child parent pointer "
+ "doesn't point to this node,\n"
+ " nodep: %p nodep->left: %p "
+ "nodep->left->parent: %p",
+ nodep, nodep->left,
+ nodep->left->parent);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (nodep->right) {
+ if (nodep->right->parent != nodep) {
+ fprintf(stderr, "Right child parent pointer "
+ "doesn't point to this node,\n"
+ " nodep: %p nodep->right: %p "
+ "nodep->right->parent: %p",
+ nodep, nodep->right,
+ nodep->right->parent);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (!nodep->parent) {
+ if (s->root != nodep) {
+ fprintf(stderr, "Unexpected root node, "
+ "s->root: %p nodep: %p",
+ s->root, nodep);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (prev) {
+ /*
+ * Is index of previous node before index of
+ * current node?
+ */
+ if (prev->idx >= nodep->idx) {
+ fprintf(stderr, "Previous node index "
+ ">= current node index,\n"
+ " prev: %p prev->idx: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx",
+ prev, prev->idx, nodep, nodep->idx);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * Nodes occur in asscending order, based on each
+ * nodes starting index.
+ */
+ if ((prev->idx + MASK_BITS + prev->num_after - 1)
+ >= nodep->idx) {
+ fprintf(stderr, "Previous node bit range "
+ "overlap with current node bit range,\n"
+ " prev: %p prev->idx: 0x%lx "
+ "prev->num_after: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "nodep->num_after: 0x%lx\n"
+ " MASK_BITS: %lu",
+ prev, prev->idx, prev->num_after,
+ nodep, nodep->idx, nodep->num_after,
+ MASK_BITS);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * When the node has all mask bits set, it shouldn't
+ * be adjacent to the last bit described by the
+ * previous node.
+ */
+ if (nodep->mask == ~(mask_t) 0 &&
+ prev->idx + MASK_BITS + prev->num_after == nodep->idx) {
+ fprintf(stderr, "Current node has mask with "
+ "all bits set and is adjacent to the "
+ "previous node,\n"
+ " prev: %p prev->idx: 0x%lx "
+ "prev->num_after: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "nodep->num_after: 0x%lx\n"
+ " MASK_BITS: %lu",
+ prev, prev->idx, prev->num_after,
+ nodep, nodep->idx, nodep->num_after,
+ MASK_BITS);
+
+ error_detected = true;
+ break;
+ }
+ }
+ }
+
+ if (!error_detected) {
+ /*
+ * Is sum of bits set in each node equal to the count
+ * of total bits set.
+ */
+ if (s->num_set != total_bits_set) {
+ fprintf(stderr, "Number of bits set missmatch,\n"
+ " s->num_set: 0x%lx total_bits_set: 0x%lx",
+ s->num_set, total_bits_set);
+
+ error_detected = true;
+ }
+ }
+
+ if (error_detected) {
+ fputs(" dump_internal:\n", stderr);
+ sparsebit_dump_internal(stderr, s, 4);
+ abort();
+ }
+}
+
+
+#ifdef FUZZ
+/* A simple but effective fuzzing driver. Look for bugs with the help
+ * of some invariants and of a trivial representation of sparsebit.
+ * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let
+ * afl-fuzz do the magic. :)
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+
+struct range {
+ sparsebit_idx_t first, last;
+ bool set;
+};
+
+struct sparsebit *s;
+struct range ranges[1000];
+int num_ranges;
+
+static bool get_value(sparsebit_idx_t idx)
+{
+ int i;
+
+ for (i = num_ranges; --i >= 0; )
+ if (ranges[i].first <= idx && idx <= ranges[i].last)
+ return ranges[i].set;
+
+ return false;
+}
+
+static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)
+{
+ sparsebit_num_t num;
+ sparsebit_idx_t next;
+
+ if (first < last) {
+ num = last - first + 1;
+ } else {
+ num = first - last + 1;
+ first = last;
+ last = first + num - 1;
+ }
+
+ switch (code) {
+ case 0:
+ sparsebit_set(s, first);
+ assert(sparsebit_is_set(s, first));
+ assert(!sparsebit_is_clear(s, first));
+ assert(sparsebit_any_set(s));
+ assert(!sparsebit_all_clear(s));
+ if (get_value(first))
+ return;
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = first, .set = true };
+ break;
+ case 1:
+ sparsebit_clear(s, first);
+ assert(!sparsebit_is_set(s, first));
+ assert(sparsebit_is_clear(s, first));
+ assert(sparsebit_any_clear(s));
+ assert(!sparsebit_all_set(s));
+ if (!get_value(first))
+ return;
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = first, .set = false };
+ break;
+ case 2:
+ assert(sparsebit_is_set(s, first) == get_value(first));
+ assert(sparsebit_is_clear(s, first) == !get_value(first));
+ break;
+ case 3:
+ if (sparsebit_any_set(s))
+ assert(get_value(sparsebit_first_set(s)));
+ if (sparsebit_any_clear(s))
+ assert(!get_value(sparsebit_first_clear(s)));
+ sparsebit_set_all(s);
+ assert(!sparsebit_any_clear(s));
+ assert(sparsebit_all_set(s));
+ num_ranges = 0;
+ ranges[num_ranges++] = (struct range)
+ { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };
+ break;
+ case 4:
+ if (sparsebit_any_set(s))
+ assert(get_value(sparsebit_first_set(s)));
+ if (sparsebit_any_clear(s))
+ assert(!get_value(sparsebit_first_clear(s)));
+ sparsebit_clear_all(s);
+ assert(!sparsebit_any_set(s));
+ assert(sparsebit_all_clear(s));
+ num_ranges = 0;
+ break;
+ case 5:
+ next = sparsebit_next_set(s, first);
+ assert(next == 0 || next > first);
+ assert(next == 0 || get_value(next));
+ break;
+ case 6:
+ next = sparsebit_next_clear(s, first);
+ assert(next == 0 || next > first);
+ assert(next == 0 || !get_value(next));
+ break;
+ case 7:
+ next = sparsebit_next_clear(s, first);
+ if (sparsebit_is_set_num(s, first, num)) {
+ assert(next == 0 || next > last);
+ if (first)
+ next = sparsebit_next_set(s, first - 1);
+ else if (sparsebit_any_set(s))
+ next = sparsebit_first_set(s);
+ else
+ return;
+ assert(next == first);
+ } else {
+ assert(sparsebit_is_clear(s, first) || next <= last);
+ }
+ break;
+ case 8:
+ next = sparsebit_next_set(s, first);
+ if (sparsebit_is_clear_num(s, first, num)) {
+ assert(next == 0 || next > last);
+ if (first)
+ next = sparsebit_next_clear(s, first - 1);
+ else if (sparsebit_any_clear(s))
+ next = sparsebit_first_clear(s);
+ else
+ return;
+ assert(next == first);
+ } else {
+ assert(sparsebit_is_set(s, first) || next <= last);
+ }
+ break;
+ case 9:
+ sparsebit_set_num(s, first, num);
+ assert(sparsebit_is_set_num(s, first, num));
+ assert(!sparsebit_is_clear_num(s, first, num));
+ assert(sparsebit_any_set(s));
+ assert(!sparsebit_all_clear(s));
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = last, .set = true };
+ break;
+ case 10:
+ sparsebit_clear_num(s, first, num);
+ assert(!sparsebit_is_set_num(s, first, num));
+ assert(sparsebit_is_clear_num(s, first, num));
+ assert(sparsebit_any_clear(s));
+ assert(!sparsebit_all_set(s));
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = last, .set = false };
+ break;
+ case 11:
+ sparsebit_validate_internal(s);
+ break;
+ default:
+ break;
+ }
+}
+
+unsigned char get8(void)
+{
+ int ch;
+
+ ch = getchar();
+ if (ch == EOF)
+ exit(0);
+ return ch;
+}
+
+uint64_t get64(void)
+{
+ uint64_t x;
+
+ x = get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ return (x << 8) | get8();
+}
+
+int main(void)
+{
+ s = sparsebit_alloc();
+ for (;;) {
+ uint8_t op = get8() & 0xf;
+ uint64_t first = get64();
+ uint64_t last = get64();
+
+ operate(op, first, last);
+ }
+}
+#endif
diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c
new file mode 100644
index 000000000000..0231bc0aae7b
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/vmx.c
@@ -0,0 +1,243 @@
+/*
+ * tools/testing/selftests/kvm/lib/x86.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+/* Create a default VM for VMX tests.
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *
+vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code)
+{
+ struct kvm_cpuid2 *cpuid;
+ struct kvm_vm *vm;
+ vm_vaddr_t vmxon_vaddr;
+ vm_paddr_t vmxon_paddr;
+ vm_vaddr_t vmcs_vaddr;
+ vm_paddr_t vmcs_paddr;
+
+ vm = vm_create_default(vcpuid, (void *) guest_code);
+
+ /* Enable nesting in CPUID */
+ vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
+
+ /* Setup of a region of guest memory for the vmxon region. */
+ vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0);
+ vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr);
+
+ /* Setup of a region of guest memory for a vmcs. */
+ vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0);
+ vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr);
+
+ vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr,
+ vmcs_paddr);
+
+ return vm;
+}
+
+void prepare_for_vmx_operation(void)
+{
+ uint64_t feature_control;
+ uint64_t required;
+ unsigned long cr0;
+ unsigned long cr4;
+
+ /*
+ * Ensure bits in CR0 and CR4 are valid in VMX operation:
+ * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
+ * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
+ */
+ __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
+ cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+ cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+ __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
+
+ __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
+ cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+ cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+ /* Enable VMX operation */
+ cr4 |= X86_CR4_VMXE;
+ __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
+
+ /*
+ * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
+ * Bit 0: Lock bit. If clear, VMXON causes a #GP.
+ * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
+ * outside of SMX causes a #GP.
+ */
+ required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+ required |= FEATURE_CONTROL_LOCKED;
+ feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+ if ((feature_control & required) != required)
+ wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required);
+}
+
+/*
+ * Initialize the control fields to the most basic settings possible.
+ */
+static inline void init_vmcs_control_fields(void)
+{
+ vmwrite(VIRTUAL_PROCESSOR_ID, 0);
+ vmwrite(POSTED_INTR_NV, 0);
+
+ vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS));
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS));
+ vmwrite(EXCEPTION_BITMAP, 0);
+ vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+ vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
+ vmwrite(CR3_TARGET_COUNT, 0);
+ vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
+ VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */
+ vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+ vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
+ VM_ENTRY_IA32E_MODE); /* 64-bit guest */
+ vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+ vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+ vmwrite(TPR_THRESHOLD, 0);
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, 0);
+
+ vmwrite(CR0_GUEST_HOST_MASK, 0);
+ vmwrite(CR4_GUEST_HOST_MASK, 0);
+ vmwrite(CR0_READ_SHADOW, get_cr0());
+ vmwrite(CR4_READ_SHADOW, get_cr4());
+}
+
+/*
+ * Initialize the host state fields based on the current host state, with
+ * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
+ * or vmresume.
+ */
+static inline void init_vmcs_host_state(void)
+{
+ uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
+
+ vmwrite(HOST_ES_SELECTOR, get_es());
+ vmwrite(HOST_CS_SELECTOR, get_cs());
+ vmwrite(HOST_SS_SELECTOR, get_ss());
+ vmwrite(HOST_DS_SELECTOR, get_ds());
+ vmwrite(HOST_FS_SELECTOR, get_fs());
+ vmwrite(HOST_GS_SELECTOR, get_gs());
+ vmwrite(HOST_TR_SELECTOR, get_tr());
+
+ if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
+ vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
+ if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
+ vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
+ if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
+ rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
+
+ vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
+
+ vmwrite(HOST_CR0, get_cr0());
+ vmwrite(HOST_CR3, get_cr3());
+ vmwrite(HOST_CR4, get_cr4());
+ vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
+ vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
+ vmwrite(HOST_TR_BASE,
+ get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr())));
+ vmwrite(HOST_GDTR_BASE, get_gdt_base());
+ vmwrite(HOST_IDTR_BASE, get_idt_base());
+ vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
+ vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
+}
+
+/*
+ * Initialize the guest state fields essentially as a clone of
+ * the host state fields. Some host state fields have fixed
+ * values, and we set the corresponding guest state fields accordingly.
+ */
+static inline void init_vmcs_guest_state(void *rip, void *rsp)
+{
+ vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
+ vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
+ vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
+ vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
+ vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
+ vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
+ vmwrite(GUEST_LDTR_SELECTOR, 0);
+ vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
+ vmwrite(GUEST_INTR_STATUS, 0);
+ vmwrite(GUEST_PML_INDEX, 0);
+
+ vmwrite(VMCS_LINK_POINTER, -1ll);
+ vmwrite(GUEST_IA32_DEBUGCTL, 0);
+ vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
+ vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
+ vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
+ vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
+
+ vmwrite(GUEST_ES_LIMIT, -1);
+ vmwrite(GUEST_CS_LIMIT, -1);
+ vmwrite(GUEST_SS_LIMIT, -1);
+ vmwrite(GUEST_DS_LIMIT, -1);
+ vmwrite(GUEST_FS_LIMIT, -1);
+ vmwrite(GUEST_GS_LIMIT, -1);
+ vmwrite(GUEST_LDTR_LIMIT, -1);
+ vmwrite(GUEST_TR_LIMIT, 0x67);
+ vmwrite(GUEST_GDTR_LIMIT, 0xffff);
+ vmwrite(GUEST_IDTR_LIMIT, 0xffff);
+ vmwrite(GUEST_ES_AR_BYTES,
+ vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+ vmwrite(GUEST_SS_AR_BYTES, 0xc093);
+ vmwrite(GUEST_DS_AR_BYTES,
+ vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_FS_AR_BYTES,
+ vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_GS_AR_BYTES,
+ vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
+ vmwrite(GUEST_TR_AR_BYTES, 0x8b);
+ vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+ vmwrite(GUEST_ACTIVITY_STATE, 0);
+ vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
+ vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
+
+ vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
+ vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
+ vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
+ vmwrite(GUEST_ES_BASE, 0);
+ vmwrite(GUEST_CS_BASE, 0);
+ vmwrite(GUEST_SS_BASE, 0);
+ vmwrite(GUEST_DS_BASE, 0);
+ vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
+ vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
+ vmwrite(GUEST_LDTR_BASE, 0);
+ vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
+ vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
+ vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
+ vmwrite(GUEST_DR7, 0x400);
+ vmwrite(GUEST_RSP, (uint64_t)rsp);
+ vmwrite(GUEST_RIP, (uint64_t)rip);
+ vmwrite(GUEST_RFLAGS, 2);
+ vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+ vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
+ vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
+}
+
+void prepare_vmcs(void *guest_rip, void *guest_rsp)
+{
+ init_vmcs_control_fields();
+ init_vmcs_host_state();
+ init_vmcs_guest_state(guest_rip, guest_rsp);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c
new file mode 100644
index 000000000000..2f17675f4275
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86.c
@@ -0,0 +1,700 @@
+/*
+ * tools/testing/selftests/kvm/lib/x86.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+#include "x86.h"
+
+/* Minimum physical address used for virtual translation tables. */
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+/* Virtual translation table structure declarations */
+struct pageMapL4Entry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageDirectoryPointerEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageDirectoryEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageTableEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t dirty:1;
+ uint64_t reserved_07:1;
+ uint64_t global:1;
+ uint64_t ignored_11_09:3;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+/* Register Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * regs - register
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the registers given by regs, to the FILE stream
+ * given by steam.
+ */
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+ uint8_t indent)
+{
+ fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
+ "rcx: 0x%.16llx rdx: 0x%.16llx\n",
+ indent, "",
+ regs->rax, regs->rbx, regs->rcx, regs->rdx);
+ fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
+ "rsp: 0x%.16llx rbp: 0x%.16llx\n",
+ indent, "",
+ regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+ fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "
+ "r10: 0x%.16llx r11: 0x%.16llx\n",
+ indent, "",
+ regs->r8, regs->r9, regs->r10, regs->r11);
+ fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
+ "r14: 0x%.16llx r15: 0x%.16llx\n",
+ indent, "",
+ regs->r12, regs->r13, regs->r14, regs->r15);
+ fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
+ indent, "",
+ regs->rip, regs->rflags);
+}
+
+/* Segment Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * segment - KVM segment
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM segment given by segment, to the FILE stream
+ * given by steam.
+ */
+static void segment_dump(FILE *stream, struct kvm_segment *segment,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
+ "selector: 0x%.4x type: 0x%.2x\n",
+ indent, "", segment->base, segment->limit,
+ segment->selector, segment->type);
+ fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
+ "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
+ indent, "", segment->present, segment->dpl,
+ segment->db, segment->s, segment->l);
+ fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
+ "unusable: 0x%.2x padding: 0x%.2x\n",
+ indent, "", segment->g, segment->avl,
+ segment->unusable, segment->padding);
+}
+
+/* dtable Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * dtable - KVM dtable
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM dtable given by dtable, to the FILE stream
+ * given by steam.
+ */
+static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
+ "padding: 0x%.4x 0x%.4x 0x%.4x\n",
+ indent, "", dtable->base, dtable->limit,
+ dtable->padding[0], dtable->padding[1], dtable->padding[2]);
+}
+
+/* System Register Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * sregs - System registers
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the system registers given by sregs, to the FILE stream
+ * given by steam.
+ */
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+ uint8_t indent)
+{
+ unsigned int i;
+
+ fprintf(stream, "%*scs:\n", indent, "");
+ segment_dump(stream, &sregs->cs, indent + 2);
+ fprintf(stream, "%*sds:\n", indent, "");
+ segment_dump(stream, &sregs->ds, indent + 2);
+ fprintf(stream, "%*ses:\n", indent, "");
+ segment_dump(stream, &sregs->es, indent + 2);
+ fprintf(stream, "%*sfs:\n", indent, "");
+ segment_dump(stream, &sregs->fs, indent + 2);
+ fprintf(stream, "%*sgs:\n", indent, "");
+ segment_dump(stream, &sregs->gs, indent + 2);
+ fprintf(stream, "%*sss:\n", indent, "");
+ segment_dump(stream, &sregs->ss, indent + 2);
+ fprintf(stream, "%*str:\n", indent, "");
+ segment_dump(stream, &sregs->tr, indent + 2);
+ fprintf(stream, "%*sldt:\n", indent, "");
+ segment_dump(stream, &sregs->ldt, indent + 2);
+
+ fprintf(stream, "%*sgdt:\n", indent, "");
+ dtable_dump(stream, &sregs->gdt, indent + 2);
+ fprintf(stream, "%*sidt:\n", indent, "");
+ dtable_dump(stream, &sregs->idt, indent + 2);
+
+ fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
+ "cr3: 0x%.16llx cr4: 0x%.16llx\n",
+ indent, "",
+ sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
+ fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
+ "apic_base: 0x%.16llx\n",
+ indent, "",
+ sregs->cr8, sregs->efer, sregs->apic_base);
+
+ fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
+ for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
+ fprintf(stream, "%*s%.16llx\n", indent + 2, "",
+ sregs->interrupt_bitmap[i]);
+ }
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+{
+ int rc;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ /* If needed, create page map l4 table. */
+ if (!vm->pgd_created) {
+ vm_paddr_t paddr = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+ vm->pgd = paddr;
+
+ /* Set pointer to pgd tables in all the VCPUs that
+ * have already been created. Future VCPUs will have
+ * the value set as each one is created.
+ */
+ for (struct vcpu *vcpu = vm->vcpu_head; vcpu;
+ vcpu = vcpu->next) {
+ struct kvm_sregs sregs;
+
+ /* Obtain the current system register settings */
+ vcpu_sregs_get(vm, vcpu->id, &sregs);
+
+ /* Set and store the pointer to the start of the
+ * pgd tables.
+ */
+ sregs.cr3 = vm->pgd;
+ vcpu_sregs_set(vm, vcpu->id, &sregs);
+ }
+
+ vm->pgd_created = true;
+ }
+}
+
+/* VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the page
+ * starting at vaddr to the page starting at paddr.
+ */
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint32_t pgd_memslot)
+{
+ uint16_t index[4];
+ struct pageMapL4Entry *pml4e;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x",
+ vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx",
+ vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ index[0] = (vaddr >> 12) & 0x1ffu;
+ index[1] = (vaddr >> 21) & 0x1ffu;
+ index[2] = (vaddr >> 30) & 0x1ffu;
+ index[3] = (vaddr >> 39) & 0x1ffu;
+
+ /* Allocate page directory pointer table if not present. */
+ pml4e = addr_gpa2hva(vm, vm->pgd);
+ if (!pml4e[index[3]].present) {
+ pml4e[index[3]].address = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+ >> vm->page_shift;
+ pml4e[index[3]].writable = true;
+ pml4e[index[3]].present = true;
+ }
+
+ /* Allocate page directory table if not present. */
+ struct pageDirectoryPointerEntry *pdpe;
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+ if (!pdpe[index[2]].present) {
+ pdpe[index[2]].address = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+ >> vm->page_shift;
+ pdpe[index[2]].writable = true;
+ pdpe[index[2]].present = true;
+ }
+
+ /* Allocate page table if not present. */
+ struct pageDirectoryEntry *pde;
+ pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+ if (!pde[index[1]].present) {
+ pde[index[1]].address = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+ >> vm->page_shift;
+ pde[index[1]].writable = true;
+ pde[index[1]].present = true;
+ }
+
+ /* Fill in page table entry. */
+ struct pageTableEntry *pte;
+ pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+ pte[index[0]].address = paddr >> vm->page_shift;
+ pte[index[0]].writable = true;
+ pte[index[0]].present = 1;
+}
+
+/* Virtual Translation Tables Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by stream, the contents of all the
+ * virtual translation tables for the VM given by vm.
+ */
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ struct pageMapL4Entry *pml4e, *pml4e_start;
+ struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
+ struct pageDirectoryEntry *pde, *pde_start;
+ struct pageTableEntry *pte, *pte_start;
+
+ if (!vm->pgd_created)
+ return;
+
+ fprintf(stream, "%*s "
+ " no\n", indent, "");
+ fprintf(stream, "%*s index hvaddr gpaddr "
+ "addr w exec dirty\n",
+ indent, "");
+ pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
+ vm->pgd);
+ for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+ pml4e = &pml4e_start[n1];
+ if (!pml4e->present)
+ continue;
+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+ " %u\n",
+ indent, "",
+ pml4e - pml4e_start, pml4e,
+ addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
+ pml4e->writable, pml4e->execute_disable);
+
+ pdpe_start = addr_gpa2hva(vm, pml4e->address
+ * vm->page_size);
+ for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+ pdpe = &pdpe_start[n2];
+ if (!pdpe->present)
+ continue;
+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
+ "%u %u\n",
+ indent, "",
+ pdpe - pdpe_start, pdpe,
+ addr_hva2gpa(vm, pdpe),
+ (uint64_t) pdpe->address, pdpe->writable,
+ pdpe->execute_disable);
+
+ pde_start = addr_gpa2hva(vm,
+ pdpe->address * vm->page_size);
+ for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+ pde = &pde_start[n3];
+ if (!pde->present)
+ continue;
+ fprintf(stream, "%*spde 0x%-3zx %p "
+ "0x%-12lx 0x%-10lx %u %u\n",
+ indent, "", pde - pde_start, pde,
+ addr_hva2gpa(vm, pde),
+ (uint64_t) pde->address, pde->writable,
+ pde->execute_disable);
+
+ pte_start = addr_gpa2hva(vm,
+ pde->address * vm->page_size);
+ for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+ pte = &pte_start[n4];
+ if (!pte->present)
+ continue;
+ fprintf(stream, "%*spte 0x%-3zx %p "
+ "0x%-12lx 0x%-10lx %u %u "
+ " %u 0x%-10lx\n",
+ indent, "",
+ pte - pte_start, pte,
+ addr_hva2gpa(vm, pte),
+ (uint64_t) pte->address,
+ pte->writable,
+ pte->execute_disable,
+ pte->dirty,
+ ((uint64_t) n1 << 27)
+ | ((uint64_t) n2 << 18)
+ | ((uint64_t) n3 << 9)
+ | ((uint64_t) n4));
+ }
+ }
+ }
+ }
+}
+
+/* Set Unusable Segment
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ * segp - Pointer to segment register
+ *
+ * Return: None
+ *
+ * Sets the segment register pointed to by segp to an unusable state.
+ */
+static void kvm_seg_set_unusable(struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->unusable = true;
+}
+
+/* Set Long Mode Flat Kernel Code Segment
+ *
+ * Input Args:
+ * selector - selector value
+ *
+ * Output Args:
+ * segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a code segment
+ * with the selector value given by selector.
+ */
+static void kvm_seg_set_kernel_code_64bit(uint16_t selector,
+ struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = selector;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
+ * | kFlagCodeReadable
+ */
+ segp->g = true;
+ segp->l = true;
+ segp->present = 1;
+}
+
+/* Set Long Mode Flat Kernel Data Segment
+ *
+ * Input Args:
+ * selector - selector value
+ *
+ * Output Args:
+ * segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a data segment
+ * with the selector value given by selector.
+ */
+static void kvm_seg_set_kernel_data_64bit(uint16_t selector,
+ struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = selector;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
+ * | kFlagDataWritable
+ */
+ segp->g = true;
+ segp->present = true;
+}
+
+/* Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gpa - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Translates the VM virtual address given by gva to a VM physical
+ * address and then locates the memory region containing the VM
+ * physical address, within the VM given by vm. When found, the host
+ * virtual address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing translated
+ * VM virtual address exists.
+ */
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint16_t index[4];
+ struct pageMapL4Entry *pml4e;
+ struct pageDirectoryPointerEntry *pdpe;
+ struct pageDirectoryEntry *pde;
+ struct pageTableEntry *pte;
+ void *hva;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ index[0] = (gva >> 12) & 0x1ffu;
+ index[1] = (gva >> 21) & 0x1ffu;
+ index[2] = (gva >> 30) & 0x1ffu;
+ index[3] = (gva >> 39) & 0x1ffu;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+ pml4e = addr_gpa2hva(vm, vm->pgd);
+ if (!pml4e[index[3]].present)
+ goto unmapped_gva;
+
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+ if (!pdpe[index[2]].present)
+ goto unmapped_gva;
+
+ pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+ if (!pde[index[1]].present)
+ goto unmapped_gva;
+
+ pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+ if (!pte[index[0]].present)
+ goto unmapped_gva;
+
+ return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
+
+unmapped_gva:
+ TEST_ASSERT(false, "No mapping for vm virtual address, "
+ "gva: 0x%lx", gva);
+}
+
+void vcpu_setup(struct kvm_vm *vm, int vcpuid)
+{
+ struct kvm_sregs sregs;
+
+ /* Set mode specific system register values. */
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+
+ switch (vm->mode) {
+ case VM_MODE_FLAT48PG:
+ sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+ sregs.cr4 |= X86_CR4_PAE;
+ sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+ kvm_seg_set_unusable(&sregs.ldt);
+ kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs);
+ kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds);
+ kvm_seg_set_kernel_data_64bit(0x10, &sregs.es);
+ break;
+
+ default:
+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+ vcpu_sregs_set(vm, vcpuid, &sregs);
+
+ /* If virtual translation table have been setup, set system register
+ * to point to the tables. It's okay if they haven't been setup yet,
+ * in that the code that sets up the virtual translation tables, will
+ * go back through any VCPUs that have already been created and set
+ * their values.
+ */
+ if (vm->pgd_created) {
+ struct kvm_sregs sregs;
+
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+
+ sregs.cr3 = vm->pgd;
+ vcpu_sregs_set(vm, vcpuid, &sregs);
+ }
+}
+/* Adds a vCPU with reasonable defaults (i.e., a stack)
+ *
+ * Input Args:
+ * vcpuid - The id of the VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ vm_vaddr_t stack_vaddr;
+ stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+ DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+
+ /* Create VCPU */
+ vm_vcpu_add(vm, vcpuid);
+
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vm, vcpuid, &regs);
+ regs.rflags = regs.rflags | 0x2;
+ regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
+ regs.rip = (unsigned long) guest_code;
+ vcpu_regs_set(vm, vcpuid, &regs);
+
+ /* Setup the MP state */
+ mp_state.mp_state = 0;
+ vcpu_set_mp_state(vm, vcpuid, &mp_state);
+}
+
+/* VM VCPU CPUID Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU id
+ * cpuid - The CPUID values to set.
+ *
+ * Output Args: None
+ *
+ * Return: void
+ *
+ * Set the VCPU's CPUID.
+ */
+void vcpu_set_cpuid(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int rc;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+ TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
+ rc, errno);
+
+}
+/* Create a VM with reasonable defaults
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code)
+{
+ struct kvm_vm *vm;
+
+ /* Create VM */
+ vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+
+ /* Setup guest code */
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+
+ /* Setup IRQ Chip */
+ vm_create_irqchip(vm);
+
+ /* Add the first vCPU. */
+ vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+ return vm;
+}
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/set_sregs_test.c
new file mode 100644
index 000000000000..090fd3f19352
--- /dev/null
+++ b/tools/testing/selftests/kvm/set_sregs_test.c
@@ -0,0 +1,54 @@
+/*
+ * KVM_SET_SREGS tests
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This is a regression test for the bug fixed by the following commit:
+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
+ *
+ * That bug allowed a user-mode program that called the KVM_SET_SREGS
+ * ioctl to put a VCPU's local APIC into an invalid state.
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 5
+
+int main(int argc, char *argv[])
+{
+ struct kvm_sregs sregs;
+ struct kvm_vm *vm;
+ int rc;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, NULL);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ sregs.apic_base = 1 << 10;
+ rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
+ sregs.apic_base);
+ sregs.apic_base = 1 << 11;
+ rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
+ sregs.apic_base);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c
new file mode 100644
index 000000000000..eae1ece3c31b
--- /dev/null
+++ b/tools/testing/selftests/kvm/sync_regs_test.c
@@ -0,0 +1,254 @@
+/*
+ * Test for x86 KVM_CAP_SYNC_REGS
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
+ * including requesting an invalid register set, updates to/from values
+ * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 5
+#define PORT_HOST_SYNC 0x1000
+
+static void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
+{
+ __asm__ __volatile__("in %[port], %%al"
+ :
+ : [port]"d"(port), "D"(arg0), "S"(arg1)
+ : "rax");
+}
+
+#define exit_to_l0(_port, _arg0, _arg1) \
+ __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
+
+#define GUEST_ASSERT(_condition) do { \
+ if (!(_condition)) \
+ exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, 0);\
+} while (0)
+
+void guest_code(void)
+{
+ for (;;) {
+ exit_to_l0(PORT_HOST_SYNC, "hello", 0);
+ asm volatile ("inc %r11");
+ }
+}
+
+static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
+{
+#define REG_COMPARE(reg) \
+ TEST_ASSERT(left->reg == right->reg, \
+ "Register " #reg \
+ " values did not match: 0x%llx, 0x%llx\n", \
+ left->reg, right->reg)
+ REG_COMPARE(rax);
+ REG_COMPARE(rbx);
+ REG_COMPARE(rcx);
+ REG_COMPARE(rdx);
+ REG_COMPARE(rsi);
+ REG_COMPARE(rdi);
+ REG_COMPARE(rsp);
+ REG_COMPARE(rbp);
+ REG_COMPARE(r8);
+ REG_COMPARE(r9);
+ REG_COMPARE(r10);
+ REG_COMPARE(r11);
+ REG_COMPARE(r12);
+ REG_COMPARE(r13);
+ REG_COMPARE(r14);
+ REG_COMPARE(r15);
+ REG_COMPARE(rip);
+ REG_COMPARE(rflags);
+#undef REG_COMPARE
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
+{
+}
+
+static void compare_vcpu_events(struct kvm_vcpu_events *left,
+ struct kvm_vcpu_events *right)
+{
+}
+
+#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
+#define INVALID_SYNC_FIELD 0x80000000
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu_events events;
+ int rv, cap;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+ if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
+ fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+ if ((cap & INVALID_SYNC_FIELD) != 0) {
+ fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, guest_code);
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ /* Request reading invalid register set from VCPU. */
+ run->kvm_valid_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+ run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+ /* Request setting invalid register set into VCPU. */
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+ /* Request and verify all valid register sets. */
+ /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ compare_regs(&regs, &run->s.regs.regs);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ compare_sregs(&sregs, &run->s.regs.sregs);
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+ compare_vcpu_events(&events, &run->s.regs.events);
+
+ /* Set and verify various register values. */
+ run->s.regs.regs.r11 = 0xBAD1DEA;
+ run->s.regs.sregs.apic_base = 1 << 11;
+ /* TODO run->s.regs.events.XYZ = ABC; */
+
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+ TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
+ "apic_base sync regs value incorrect 0x%llx.",
+ run->s.regs.sregs.apic_base);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ compare_regs(&regs, &run->s.regs.regs);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ compare_sregs(&sregs, &run->s.regs.sregs);
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+ compare_vcpu_events(&events, &run->s.regs.events);
+
+ /* Clear kvm_dirty_regs bits, verify new s.regs values are
+ * overwritten with existing guest values.
+ */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.regs.r11 = 0xDEADBEEF;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+
+ /* Clear kvm_valid_regs bits and kvm_dirty_bits.
+ * Verify s.regs values are not overwritten with existing guest values
+ * and that guest values are not overwritten with kvm_sync_regs values.
+ */
+ run->kvm_valid_regs = 0;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.regs.r11 = 0xAAAA;
+ regs.r11 = 0xBAC0;
+ vcpu_regs_set(vm, VCPU_ID, &regs);
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ TEST_ASSERT(regs.r11 == 0xBAC0 + 1,
+ "r11 guest value incorrect 0x%llx.",
+ regs.r11);
+
+ /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
+ * with existing guest values but that guest values are overwritten
+ * with kvm_sync_regs values.
+ */
+ run->kvm_valid_regs = 0;
+ run->kvm_dirty_regs = TEST_SYNC_FIELDS;
+ run->s.regs.regs.r11 = 0xBBBB;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ TEST_ASSERT(regs.r11 == 0xBBBB + 1,
+ "r11 guest value incorrect 0x%llx.",
+ regs.r11);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
new file mode 100644
index 000000000000..d7cb7944a42e
--- /dev/null
+++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
@@ -0,0 +1,233 @@
+/*
+ * gtests/tests/vmx_tsc_adjust_test.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * IA32_TSC_ADJUST test
+ *
+ * According to the SDM, "if an execution of WRMSR to the
+ * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
+ * the logical processor also adds (or subtracts) value X from the
+ * IA32_TSC_ADJUST MSR.
+ *
+ * Note that when L1 doesn't intercept writes to IA32_TSC, a
+ * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
+ * value.
+ *
+ * This test verifies that this unusual case is handled correctly.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "../kselftest.h"
+
+#ifndef MSR_IA32_TSC_ADJUST
+#define MSR_IA32_TSC_ADJUST 0x3b
+#endif
+
+#define PAGE_SIZE 4096
+#define VCPU_ID 5
+
+#define TSC_ADJUST_VALUE (1ll << 32)
+#define TSC_OFFSET_VALUE -(1ll << 48)
+
+enum {
+ PORT_ABORT = 0x1000,
+ PORT_REPORT,
+ PORT_DONE,
+};
+
+struct vmx_page {
+ vm_vaddr_t virt;
+ vm_paddr_t phys;
+};
+
+enum {
+ VMXON_PAGE = 0,
+ VMCS_PAGE,
+ MSR_BITMAP_PAGE,
+
+ NUM_VMX_PAGES,
+};
+
+struct kvm_single_msr {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+} __attribute__((packed));
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+/* Array of vmx_page descriptors that is shared with the guest. */
+struct vmx_page *vmx_pages;
+
+#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg))
+static void do_exit_to_l0(uint16_t port, unsigned long arg)
+{
+ __asm__ __volatile__("in %[port], %%al"
+ :
+ : [port]"d"(port), "D"(arg)
+ : "rax");
+}
+
+
+#define GUEST_ASSERT(_condition) do { \
+ if (!(_condition)) \
+ exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \
+} while (0)
+
+static void check_ia32_tsc_adjust(int64_t max)
+{
+ int64_t adjust;
+
+ adjust = rdmsr(MSR_IA32_TSC_ADJUST);
+ exit_to_l0(PORT_REPORT, adjust);
+ GUEST_ASSERT(adjust <= max);
+}
+
+static void l2_guest_code(void)
+{
+ uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
+
+ wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
+ check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+ /* Exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_page *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+ uintptr_t save_cr3;
+
+ GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
+ wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
+ check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+
+ prepare_for_vmx_operation();
+
+ /* Enter VMX root operation. */
+ *(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision();
+ GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys));
+
+ /* Load a VMCS. */
+ *(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision();
+ GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys));
+ GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+ /* Jump into L2. First, test failure to load guest CR3. */
+ save_cr3 = vmreadz(GUEST_CR3);
+ vmwrite(GUEST_CR3, -1ull);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+ (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+ check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+ vmwrite(GUEST_CR3, save_cr3);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+ exit_to_l0(PORT_DONE, 0);
+}
+
+static void allocate_vmx_page(struct vmx_page *page)
+{
+ vm_vaddr_t virt;
+
+ virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0);
+ memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE);
+
+ page->virt = virt;
+ page->phys = addr_gva2gpa(vm, virt);
+}
+
+static vm_vaddr_t allocate_vmx_pages(void)
+{
+ vm_vaddr_t vmx_pages_vaddr;
+ int i;
+
+ vmx_pages_vaddr = vm_vaddr_alloc(
+ vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0);
+
+ vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr);
+
+ for (i = 0; i < NUM_VMX_PAGES; i++)
+ allocate_vmx_page(&vmx_pages[i]);
+
+ return vmx_pages_vaddr;
+}
+
+void report(int64_t val)
+{
+ printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+ val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_vaddr;
+ struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+ if (!(entry->ecx & CPUID_VMX)) {
+ fprintf(stderr, "nested VMX not enabled, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code);
+
+ /* Allocate VMX pages and shared descriptors (vmx_pages). */
+ vmx_pages_vaddr = allocate_vmx_pages();
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_regs regs;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+
+ switch (run->io.port) {
+ case PORT_ABORT:
+ TEST_ASSERT(false, "%s", (const char *) regs.rdi);
+ /* NOT REACHED */
+ case PORT_REPORT:
+ report(regs.rdi);
+ break;
+ case PORT_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
+ }
+ }
+
+ kvm_vm_free(vm);
+done:
+ return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 7de482a0519d..6466294366dc 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -19,24 +19,43 @@ TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
.ONESHELL:
+define RUN_TEST_PRINT_RESULT
+ TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \
+ echo $$TEST_HDR_MSG; \
+ echo "========================================"; \
+ if [ ! -x $$TEST ]; then \
+ echo "$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.";\
+ echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
+ else \
+ cd `dirname $$TEST` > /dev/null; \
+ if [ "X$(summary)" != "X" ]; then \
+ (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && \
+ echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
+ (if [ $$? -eq $$skip ]; then \
+ echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
+ else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
+ fi;) \
+ else \
+ (./$$BASENAME_TEST && \
+ echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
+ (if [ $$? -eq $$skip ]; then \
+ echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
+ else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
+ fi;) \
+ fi; \
+ cd - > /dev/null; \
+ fi;
+endef
+
define RUN_TESTS
- @test_num=`echo 0`;
- @echo "TAP version 13";
- @for TEST in $(1); do \
+ @export KSFT_TAP_LEVEL=`echo 1`; \
+ test_num=`echo 0`; \
+ skip=`echo 4`; \
+ echo "TAP version 13"; \
+ for TEST in $(1); do \
BASENAME_TEST=`basename $$TEST`; \
test_num=`echo $$test_num+1 | bc`; \
- echo "selftests: $$BASENAME_TEST"; \
- echo "========================================"; \
- if [ ! -x $$TEST ]; then \
- echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
- echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
- else \
- if [ "X$(summary)" != "X" ]; then \
- cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
- else \
- cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
- fi; \
- fi; \
+ $(call RUN_TEST_PRINT_RESULT,$(TEST),$(BASENAME_TEST),$(test_num),$(skip)) \
done;
endef
@@ -75,9 +94,18 @@ else
endif
define EMIT_TESTS
- @for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
+ @test_num=`echo 0`; \
+ for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
BASENAME_TEST=`basename $$TEST`; \
- echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
+ test_num=`echo $$test_num+1 | bc`; \
+ TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \
+ echo "echo $$TEST_HDR_MSG"; \
+ if [ ! -x $$TEST ]; then \
+ echo "echo \"$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.\""; \
+ echo "echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\""; \
+ else
+ echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"ok 1..$$test_num $$TEST_HDR_MSG [PASS]\") || (if [ \$$? -eq \$$skip ]; then echo \"not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]\"; else echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\"; fi;)"; \
+ fi; \
done;
endef
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index 08360060ab14..70d5711e3ac8 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -3,6 +3,6 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := printf.sh bitmap.sh
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh
include ../lib.mk
diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh
index 4dee4d2a8bbe..5a90006d1aea 100755
--- a/tools/testing/selftests/lib/bitmap.sh
+++ b/tools/testing/selftests/lib/bitmap.sh
@@ -1,9 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
# Runs bitmap infrastructure tests using test_bitmap kernel module
if ! /sbin/modprobe -q -n test_bitmap; then
- echo "bitmap: [SKIP]"
- exit 77
+ echo "bitmap: module test_bitmap is not found [SKIP]"
+ exit $ksft_skip
fi
if /sbin/modprobe -q test_bitmap; then
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
index b363994e5e11..78e7483c8d60 100755
--- a/tools/testing/selftests/lib/prime_numbers.sh
+++ b/tools/testing/selftests/lib/prime_numbers.sh
@@ -2,9 +2,12 @@
# SPDX-License-Identifier: GPL-2.0
# Checks fast/slow prime_number generation for inconsistencies
-if ! /sbin/modprobe -q -r prime_numbers; then
- echo "prime_numbers: [SKIP]"
- exit 77
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n prime_numbers; then
+ echo "prime_numbers: module prime_numbers is not found [SKIP]"
+ exit $ksft_skip
fi
if /sbin/modprobe -q prime_numbers selftest=65536; then
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
index 0c37377fd7d4..45a23e2d64ad 100755
--- a/tools/testing/selftests/lib/printf.sh
+++ b/tools/testing/selftests/lib/printf.sh
@@ -1,9 +1,13 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# Runs printf infrastructure using test_printf kernel module
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
if ! /sbin/modprobe -q -n test_printf; then
- echo "printf: [SKIP]"
- exit 77
+ echo "printf: module test_printf is not found [SKIP]"
+ exit $ksft_skip
fi
if /sbin/modprobe -q test_printf; then
diff --git a/tools/testing/selftests/locking/Makefile b/tools/testing/selftests/locking/Makefile
new file mode 100644
index 000000000000..6e7761ab3536
--- /dev/null
+++ b/tools/testing/selftests/locking/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for locking/ww_mutx selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := ww_mutex.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh
index 2c3d6b1878c2..91e4ac7566af 100644..100755
--- a/tools/testing/selftests/locking/ww_mutex.sh
+++ b/tools/testing/selftests/locking/ww_mutex.sh
@@ -1,6 +1,14 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
# Runs API tests for struct ww_mutex (Wait/Wound mutexes)
+if ! /sbin/modprobe -q -n test-ww_mutex; then
+ echo "ww_mutex: module test-ww_mutex is not found [SKIP]"
+ exit $ksft_skip
+fi
if /sbin/modprobe -q test-ww_mutex; then
/sbin/modprobe -q -r test-ww_mutex
diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile
index c82cec2497de..60826d7d37d4 100644
--- a/tools/testing/selftests/media_tests/Makefile
+++ b/tools/testing/selftests/media_tests/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
+#
+CFLAGS += -I../ -I../../../../usr/include/
TEST_GEN_PROGS := media_device_test media_device_open video_device_test
-all: $(TEST_GEN_PROGS)
include ../lib.mk
diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c
index a5ce5434bafd..93183a37b133 100644
--- a/tools/testing/selftests/media_tests/media_device_open.c
+++ b/tools/testing/selftests/media_tests/media_device_open.c
@@ -34,6 +34,8 @@
#include <sys/stat.h>
#include <linux/media.h>
+#include "../kselftest.h"
+
int main(int argc, char **argv)
{
int opt;
@@ -61,10 +63,8 @@ int main(int argc, char **argv)
}
}
- if (getuid() != 0) {
- printf("Please run the test as root - Exiting.\n");
- exit(-1);
- }
+ if (getuid() != 0)
+ ksft_exit_skip("Please run the test as root - Exiting.\n");
/* Open Media device and keep it open */
fd = open(media_device, O_RDWR);
diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c
index 421a367e4bb3..4b9953359e40 100644
--- a/tools/testing/selftests/media_tests/media_device_test.c
+++ b/tools/testing/selftests/media_tests/media_device_test.c
@@ -39,6 +39,8 @@
#include <time.h>
#include <linux/media.h>
+#include "../kselftest.h"
+
int main(int argc, char **argv)
{
int opt;
@@ -66,10 +68,8 @@ int main(int argc, char **argv)
}
}
- if (getuid() != 0) {
- printf("Please run the test as root - Exiting.\n");
- exit(-1);
- }
+ if (getuid() != 0)
+ ksft_exit_skip("Please run the test as root - Exiting.\n");
/* Generate random number of interations */
srand((unsigned int) time(NULL));
@@ -88,7 +88,7 @@ int main(int argc, char **argv)
"other Oops in the dmesg. Enable KaSan kernel\n"
"config option for use-after-free error detection.\n\n");
- printf("Running test for %d iternations\n", count);
+ printf("Running test for %d iterations\n", count);
while (count > 0) {
ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi);
diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c
index 22bffd55a523..6793f8ecc8e7 100644
--- a/tools/testing/selftests/membarrier/membarrier_test.c
+++ b/tools/testing/selftests/membarrier/membarrier_test.c
@@ -293,10 +293,9 @@ static int test_membarrier_query(void)
}
ksft_exit_fail_msg("sys_membarrier() failed\n");
}
- if (!(ret & MEMBARRIER_CMD_GLOBAL)) {
- ksft_test_result_fail("sys_membarrier() CMD_GLOBAL query failed\n");
- ksft_exit_fail_msg("sys_membarrier is not supported.\n");
- }
+ if (!(ret & MEMBARRIER_CMD_GLOBAL))
+ ksft_exit_skip(
+ "sys_membarrier unsupported: CMD_GLOBAL not found.\n");
ksft_test_result_pass("sys_membarrier available\n");
return 0;
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index 0862e6f47a38..53a848109f7b 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -4,9 +4,9 @@ CFLAGS += -I../../../../include/uapi/
CFLAGS += -I../../../../include/
CFLAGS += -I../../../../usr/include/
-TEST_PROGS := run_tests.sh
-TEST_FILES := run_fuse_test.sh
-TEST_GEN_FILES := memfd_test fuse_mnt fuse_test
+TEST_GEN_PROGS := memfd_test
+TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh
+TEST_GEN_FILES := fuse_mnt fuse_test
fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
index c2d41ed81b24..fb633eeb0290 100755
--- a/tools/testing/selftests/memfd/run_tests.sh
+++ b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
@@ -1,11 +1,8 @@
#!/bin/bash
# please run as root
-#
-# Normal tests requiring no special resources
-#
-./run_fuse_test.sh
-./memfd_test
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
#
# To test memfd_create with hugetlbfs, there needs to be hpages_test
@@ -29,12 +26,13 @@ if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then
nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
hpages_needed=`expr $hpages_test - $freepgs`
+ if [ $UID != 0 ]; then
+ echo "Please run memfd with hugetlbfs test as root"
+ exit $ksft_skip
+ fi
+
echo 3 > /proc/sys/vm/drop_caches
echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
- if [ $? -ne 0 ]; then
- echo "Please run this test as root"
- exit 1
- fi
while read name size unit; do
if [ "$name" = "HugePages_Free:" ]; then
freepgs=$size
@@ -53,7 +51,7 @@ if [ $freepgs -lt $hpages_test ]; then
fi
printf "Not enough huge pages available (%d < %d)\n" \
$freepgs $needpgs
- exit 1
+ exit $ksft_skip
fi
#
diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
index 686da510f989..e0a625e34f40 100644
--- a/tools/testing/selftests/memory-hotplug/Makefile
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@@ -4,11 +4,8 @@ all:
include ../lib.mk
TEST_PROGS := mem-on-off-test.sh
-override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]"
-
-override EMIT_TESTS := echo "$(subst @,,$(RUN_TESTS))"
run_full_test:
- @/bin/bash ./mem-on-off-test.sh && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
+ @/bin/bash ./mem-on-off-test.sh -r 10 && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
clean:
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
index ae2c790d0880..b37585e6aa38 100755
--- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
+++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
@@ -3,30 +3,33 @@
SYSFS=
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
prerequisite()
{
msg="skip all tests:"
if [ $UID != 0 ]; then
echo $msg must be run as root >&2
- exit 0
+ exit $ksft_skip
fi
SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
if [ ! -d "$SYSFS" ]; then
echo $msg sysfs is not mounted >&2
- exit 0
+ exit $ksft_skip
fi
if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then
echo $msg memory hotplug is not supported >&2
- exit 0
+ exit $ksft_skip
fi
if ! grep -q 1 $SYSFS/devices/system/memory/memory*/removable; then
echo $msg no hot-pluggable memory >&2
- exit 0
+ exit $ksft_skip
fi
}
@@ -133,7 +136,8 @@ offline_memory_expect_fail()
error=-12
priority=0
-ratio=10
+# Run with default of ratio=2 for Kselftest run
+ratio=2
retval=0
while getopts e:hp:r: opt; do
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
index e094f71c6dbc..026890744215 100644
--- a/tools/testing/selftests/mount/Makefile
+++ b/tools/testing/selftests/mount/Makefile
@@ -3,15 +3,7 @@
CFLAGS = -Wall \
-O2
-TEST_GEN_PROGS := unprivileged-remount-test
+TEST_PROGS := run_tests.sh
+TEST_GEN_FILES := unprivileged-remount-test
include ../lib.mk
-
-override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \
- then \
- ./unprivileged-remount-test ; \
- else \
- echo "WARN: No /proc/self/uid_map exist, test skipped." ; \
- fi
-override EMIT_TESTS := echo "$(RUN_TESTS)"
-
diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_tests.sh
new file mode 100755
index 000000000000..4ab8f507dcba
--- /dev/null
+++ b/tools/testing/selftests/mount/run_tests.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Run mount selftests
+if [ -f /proc/self/uid_map ] ; then
+ ./unprivileged-remount-test ;
+else
+ echo "WARN: No /proc/self/uid_map exist, test skipped." ;
+ exit $ksft_skip
+fi
diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
index 743d3f9e5918..8a58055fc1f5 100644
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -1,17 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -O2
LDLIBS = -lrt -lpthread -lpopt
+
TEST_GEN_PROGS := mq_open_tests mq_perf_tests
include ../lib.mk
-
-override define RUN_TESTS
- @$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
- @$(OUTPUT)/mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
-endef
-
-override define EMIT_TESTS
- echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\""
- echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\""
-endef
-
diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c
index e0a74bd207a5..9403ac01ba11 100644
--- a/tools/testing/selftests/mqueue/mq_open_tests.c
+++ b/tools/testing/selftests/mqueue/mq_open_tests.c
@@ -33,6 +33,8 @@
#include <mqueue.h>
#include <error.h>
+#include "../kselftest.h"
+
static char *usage =
"Usage:\n"
" %s path\n"
@@ -53,6 +55,7 @@ int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize;
int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize;
FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize;
char *queue_path;
+char *default_queue_path = "/test1";
mqd_t queue = -1;
static inline void __set(FILE *stream, int value, char *err_msg);
@@ -238,35 +241,33 @@ int main(int argc, char *argv[])
struct mq_attr attr, result;
if (argc != 2) {
- fprintf(stderr, "Must pass a valid queue name\n\n");
- fprintf(stderr, usage, argv[0]);
- exit(1);
- }
+ printf("Using Default queue path - %s\n", default_queue_path);
+ queue_path = default_queue_path;
+ } else {
/*
* Although we can create a msg queue with a non-absolute path name,
* unlink will fail. So, if the name doesn't start with a /, add one
* when we save it.
*/
- if (*argv[1] == '/')
- queue_path = strdup(argv[1]);
- else {
- queue_path = malloc(strlen(argv[1]) + 2);
- if (!queue_path) {
- perror("malloc()");
- exit(1);
+ if (*argv[1] == '/')
+ queue_path = strdup(argv[1]);
+ else {
+ queue_path = malloc(strlen(argv[1]) + 2);
+ if (!queue_path) {
+ perror("malloc()");
+ exit(1);
+ }
+ queue_path[0] = '/';
+ queue_path[1] = 0;
+ strcat(queue_path, argv[1]);
}
- queue_path[0] = '/';
- queue_path[1] = 0;
- strcat(queue_path, argv[1]);
}
- if (getuid() != 0) {
- fprintf(stderr, "Not running as root, but almost all tests "
+ if (getuid() != 0)
+ ksft_exit_skip("Not running as root, but almost all tests "
"require root in order to modify\nsystem settings. "
"Exiting.\n");
- exit(1);
- }
/* Find out what files there are for us to make tweaks in */
def_msgs = fopen(DEF_MSGS, "r+");
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index 8188f72de93c..b019e0b8221c 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -39,6 +39,8 @@
#include <popt.h>
#include <error.h>
+#include "../kselftest.h"
+
static char *usage =
"Usage:\n"
" %s [-c #[,#..] -f] path\n"
@@ -626,12 +628,10 @@ int main(int argc, char *argv[])
cpus_to_pin[0] = cpus_online - 1;
}
- if (getuid() != 0) {
- fprintf(stderr, "Not running as root, but almost all tests "
+ if (getuid() != 0)
+ ksft_exit_skip("Not running as root, but almost all tests "
"require root in order to modify\nsystem settings. "
"Exiting.\n");
- exit(1);
- }
max_msgs = fopen(MAX_MSGS, "r+");
max_msgsize = fopen(MAX_MSGSIZE, "r+");
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 785fc18a16b4..3ff81a478dbe 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -6,6 +6,7 @@ CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
+TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 6a75a3ea44ad..7ba089b33e8b 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -7,3 +7,8 @@ CONFIG_NET_L3_MASTER_DEV=y
CONFIG_IPV6=y
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_VETH=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_NET_IPVTI=y
+CONFIG_INET6_XFRM_MODE_TUNNEL=y
+CONFIG_IPV6_VTI=y
+CONFIG_DUMMY=y
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 9164e60d4b66..5baac82b9287 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -5,6 +5,8 @@
# different events.
ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
VERBOSE=${VERBOSE:=0}
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
@@ -579,18 +581,18 @@ fib_test()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip;
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
ip route help 2>&1 | grep -q fibmatch
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing fibmatch"
- exit 0
+ exit $ksft_skip
fi
# start clean
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
index 903679e0ff31..e3afcb424710 100755
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -8,6 +8,9 @@
# if not they probably have failed earlier in the boot process and their logged error will be catched by another test
#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
# this function will try to up the interface
# if already up, nothing done
# arg1: network interface name
@@ -18,7 +21,7 @@ kci_net_start()
ip link show "$netdev" |grep -q UP
if [ $? -eq 0 ];then
echo "SKIP: $netdev: interface already up"
- return 0
+ return $ksft_skip
fi
ip link set "$netdev" up
@@ -61,12 +64,12 @@ kci_net_setup()
ip address show "$netdev" |grep '^[[:space:]]*inet'
if [ $? -eq 0 ];then
echo "SKIP: $netdev: already have an IP"
- return 0
+ return $ksft_skip
fi
# TODO what ipaddr to set ? DHCP ?
echo "SKIP: $netdev: set IP address"
- return 0
+ return $ksft_skip
}
# test an ethtool command
@@ -84,6 +87,7 @@ kci_netdev_ethtool_test()
if [ $ret -ne 0 ];then
if [ $ret -eq "$1" ];then
echo "SKIP: $netdev: ethtool $2 not supported"
+ return $ksft_skip
else
echo "FAIL: $netdev: ethtool $2"
return 1
@@ -104,7 +108,7 @@ kci_netdev_ethtool()
ethtool --version 2>/dev/null >/dev/null
if [ $? -ne 0 ];then
echo "SKIP: ethtool not present"
- return 1
+ return $ksft_skip
fi
TMP_ETHTOOL_FEATURES="$(mktemp)"
@@ -176,13 +180,13 @@ kci_test_netdev()
#check for needed privileges
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
ip link show 2>/dev/null >/dev/null
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without the ip tool"
- exit 0
+ exit $ksft_skip
fi
TMP_LIST_NETDEV="$(mktemp)"
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 1e428781a625..7514f93e1624 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -43,6 +43,9 @@
# that MTU is properly calculated instead when MTU is not configured from
# userspace
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
tests="
pmtu_vti6_exception vti6: PMTU exceptions
pmtu_vti4_exception vti4: PMTU exceptions
@@ -162,7 +165,7 @@ setup_xfrm6() {
}
setup() {
- [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return 1
+ [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
cleanup_done=0
for arg do
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 7f6cd9fdacf3..7ec4fa4d55dc 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -60,6 +60,8 @@
#include "psock_lib.h"
+#include "../kselftest.h"
+
#ifndef bug_on
# define bug_on(cond) assert(!(cond))
#endif
@@ -825,7 +827,7 @@ static int test_tpacket(int version, int type)
fprintf(stderr, "test: skip %s %s since user and kernel "
"space have different bit width\n",
tpacket_str[version], type_str[type]);
- return 0;
+ return KSFT_SKIP;
}
sock = pfsocket(version);
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
index 365c32e84189..c9f478b40996 100644
--- a/tools/testing/selftests/net/reuseport_bpf_numa.c
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -23,6 +23,8 @@
#include <unistd.h>
#include <numa.h>
+#include "../kselftest.h"
+
static const int PORT = 8888;
static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
@@ -229,7 +231,7 @@ int main(void)
int *rcv_fd, nodes;
if (numa_available() < 0)
- error(1, errno, "no numa api support");
+ ksft_exit_skip("no numa api support\n");
nodes = numa_max_node() + 1;
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index e6f485235435..fb3767844e42 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -7,6 +7,9 @@
devdummy="test-dummy0"
ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
# set global exit status, but never reset nonzero one.
check_err()
{
@@ -333,7 +336,7 @@ kci_test_vrf()
ip link show type vrf 2>/dev/null
if [ $? -ne 0 ]; then
echo "SKIP: vrf: iproute2 too old"
- return 0
+ return $ksft_skip
fi
ip link add "$vrfname" type vrf table 10
@@ -409,7 +412,7 @@ kci_test_encap_fou()
ip fou help 2>&1 |grep -q 'Usage: ip fou'
if [ $? -ne 0 ];then
echo "SKIP: fou: iproute2 too old"
- return 1
+ return $ksft_skip
fi
ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null
@@ -444,7 +447,7 @@ kci_test_encap()
ip netns add "$testns"
if [ $? -ne 0 ]; then
echo "SKIP encap tests: cannot add net namespace $testns"
- return 1
+ return $ksft_skip
fi
ip netns exec "$testns" ip link set lo up
@@ -469,7 +472,7 @@ kci_test_macsec()
ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
if [ $? -ne 0 ]; then
echo "SKIP: macsec: iproute2 too old"
- return 0
+ return $ksft_skip
fi
ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
@@ -511,14 +514,14 @@ kci_test_gretap()
ip netns add "$testns"
if [ $? -ne 0 ]; then
echo "SKIP gretap tests: cannot add net namespace $testns"
- return 1
+ return $ksft_skip
fi
ip link help gretap 2>&1 | grep -q "^Usage:"
if [ $? -ne 0 ];then
echo "SKIP: gretap: iproute2 too old"
ip netns del "$testns"
- return 1
+ return $ksft_skip
fi
# test native tunnel
@@ -561,14 +564,14 @@ kci_test_ip6gretap()
ip netns add "$testns"
if [ $? -ne 0 ]; then
echo "SKIP ip6gretap tests: cannot add net namespace $testns"
- return 1
+ return $ksft_skip
fi
ip link help ip6gretap 2>&1 | grep -q "^Usage:"
if [ $? -ne 0 ];then
echo "SKIP: ip6gretap: iproute2 too old"
ip netns del "$testns"
- return 1
+ return $ksft_skip
fi
# test native tunnel
@@ -611,13 +614,13 @@ kci_test_erspan()
ip link help erspan 2>&1 | grep -q "^Usage:"
if [ $? -ne 0 ];then
echo "SKIP: erspan: iproute2 too old"
- return 1
+ return $ksft_skip
fi
ip netns add "$testns"
if [ $? -ne 0 ]; then
echo "SKIP erspan tests: cannot add net namespace $testns"
- return 1
+ return $ksft_skip
fi
# test native tunnel erspan v1
@@ -676,13 +679,13 @@ kci_test_ip6erspan()
ip link help ip6erspan 2>&1 | grep -q "^Usage:"
if [ $? -ne 0 ];then
echo "SKIP: ip6erspan: iproute2 too old"
- return 1
+ return $ksft_skip
fi
ip netns add "$testns"
if [ $? -ne 0 ]; then
echo "SKIP ip6erspan tests: cannot add net namespace $testns"
- return 1
+ return $ksft_skip
fi
# test native tunnel ip6erspan v1
@@ -762,14 +765,14 @@ kci_test_rtnl()
#check for needed privileges
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
for x in ip tc;do
$x -Version 2>/dev/null >/dev/null
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without the $x tool"
- exit 0
+ exit $ksft_skip
fi
done
diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore
index 04dc1e6ef2ce..9161679b1e1a 100644
--- a/tools/testing/selftests/powerpc/benchmarks/.gitignore
+++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore
@@ -1,5 +1,7 @@
gettimeofday
context_switch
+fork
+exec_target
mmap_bench
futex_bench
null_syscall
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index a35058e3766c..b4d7432a0ecd 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscall
+TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall
+TEST_GEN_FILES := exec_target
CFLAGS += -O2
@@ -10,3 +11,7 @@ $(TEST_GEN_PROGS): ../harness.c
$(OUTPUT)/context_switch: ../utils.c
$(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
$(OUTPUT)/context_switch: LDLIBS += -lpthread
+
+$(OUTPUT)/fork: LDLIBS += -lpthread
+
+$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles
diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
new file mode 100644
index 000000000000..3c9c144192be
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Part of fork context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+void _exit(int);
+void _start(void)
+{
+ _exit(0);
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/fork.c b/tools/testing/selftests/powerpc/benchmarks/fork.c
new file mode 100644
index 000000000000..d312e638cb37
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/fork.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static unsigned int timeout = 30;
+
+static void set_cpu(int cpu)
+{
+ cpu_set_t cpuset;
+
+ if (cpu == -1)
+ return;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+ perror("sched_setaffinity");
+ exit(1);
+ }
+}
+
+static void start_process_on(void *(*fn)(void *), void *arg, int cpu)
+{
+ int pid;
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+
+ if (pid)
+ return;
+
+ set_cpu(cpu);
+
+ fn(arg);
+
+ exit(0);
+}
+
+static int cpu;
+static int do_fork = 0;
+static int do_vfork = 0;
+static int do_exec = 0;
+static char *exec_file;
+static int exec_target = 0;
+static unsigned long iterations;
+static unsigned long iterations_prev;
+
+static void run_exec(void)
+{
+ char *const argv[] = { "./exec_target", NULL };
+
+ if (execve("./exec_target", argv, NULL) == -1) {
+ perror("execve");
+ exit(1);
+ }
+}
+
+static void bench_fork(void)
+{
+ while (1) {
+ pid_t pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid == 0) {
+ if (do_exec)
+ run_exec();
+ _exit(0);
+ }
+ pid = waitpid(pid, NULL, 0);
+ if (pid == -1) {
+ perror("waitpid");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void bench_vfork(void)
+{
+ while (1) {
+ pid_t pid = vfork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid == 0) {
+ if (do_exec)
+ run_exec();
+ _exit(0);
+ }
+ pid = waitpid(pid, NULL, 0);
+ if (pid == -1) {
+ perror("waitpid");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void *null_fn(void *arg)
+{
+ pthread_exit(NULL);
+}
+
+static void bench_thread(void)
+{
+ pthread_t tid;
+ cpu_set_t cpuset;
+ pthread_attr_t attr;
+ int rc;
+
+ rc = pthread_attr_init(&attr);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_init");
+ exit(1);
+ }
+
+ if (cpu != -1) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_setaffinity_np");
+ exit(1);
+ }
+ }
+
+ while (1) {
+ rc = pthread_create(&tid, &attr, null_fn, NULL);
+ if (rc) {
+ errno = rc;
+ perror("pthread_create");
+ exit(1);
+ }
+ rc = pthread_join(tid, NULL);
+ if (rc) {
+ errno = rc;
+ perror("pthread_join");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void sigalrm_handler(int junk)
+{
+ unsigned long i = iterations;
+
+ printf("%ld\n", i - iterations_prev);
+ iterations_prev = i;
+
+ if (--timeout == 0)
+ kill(0, SIGUSR1);
+
+ alarm(1);
+}
+
+static void sigusr1_handler(int junk)
+{
+ exit(0);
+}
+
+static void *bench_proc(void *arg)
+{
+ signal(SIGALRM, sigalrm_handler);
+ alarm(1);
+
+ if (do_fork)
+ bench_fork();
+ else if (do_vfork)
+ bench_vfork();
+ else
+ bench_thread();
+
+ return NULL;
+}
+
+static struct option options[] = {
+ { "fork", no_argument, &do_fork, 1 },
+ { "vfork", no_argument, &do_vfork, 1 },
+ { "exec", no_argument, &do_exec, 1 },
+ { "timeout", required_argument, 0, 's' },
+ { "exec-target", no_argument, &exec_target, 1 },
+ { NULL },
+};
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: fork <options> CPU\n\n");
+ fprintf(stderr, "\t\t--fork\tUse fork() (default threads)\n");
+ fprintf(stderr, "\t\t--vfork\tUse vfork() (default threads)\n");
+ fprintf(stderr, "\t\t--exec\tAlso exec() (default no exec)\n");
+ fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n");
+ fprintf(stderr, "\t\t--exec-target\tInternal option for exec workload\n");
+}
+
+int main(int argc, char *argv[])
+{
+ signed char c;
+
+ while (1) {
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "", options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ if (options[option_index].flag != 0)
+ break;
+
+ usage();
+ exit(1);
+ break;
+
+ case 's':
+ timeout = atoi(optarg);
+ break;
+
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ if (do_fork && do_vfork) {
+ usage();
+ exit(1);
+ }
+ if (do_exec && !do_fork && !do_vfork) {
+ usage();
+ exit(1);
+ }
+
+ if (do_exec) {
+ char *dirname = strdup(argv[0]);
+ int i;
+ i = strlen(dirname) - 1;
+ while (i) {
+ if (dirname[i] == '/') {
+ dirname[i] = '\0';
+ if (chdir(dirname) == -1) {
+ perror("chdir");
+ exit(1);
+ }
+ break;
+ }
+ i--;
+ }
+ }
+
+ if (exec_target) {
+ exit(0);
+ }
+
+ if (((argc - optind) != 1)) {
+ cpu = -1;
+ } else {
+ cpu = atoi(argv[optind++]);
+ }
+
+ if (do_exec)
+ exec_file = argv[0];
+
+ set_cpu(cpu);
+
+ printf("Using ");
+ if (do_fork)
+ printf("fork");
+ else if (do_vfork)
+ printf("vfork");
+ else
+ printf("clone");
+
+ if (do_exec)
+ printf(" + exec");
+
+ printf(" on cpu %d\n", cpu);
+
+ /* Create a new process group so we can signal everyone for exit */
+ setpgid(getpid(), getpid());
+
+ signal(SIGUSR1, sigusr1_handler);
+
+ start_process_on(bench_proc, NULL, cpu);
+
+ while (1)
+ sleep(3600);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index ac4a52e19e59..eedce3366f64 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -5,8 +5,8 @@ CFLAGS += -I$(CURDIR)
CFLAGS += -D SELFTEST
CFLAGS += -maltivec
-# Use our CFLAGS for the implicit .S rule
-ASFLAGS = $(CFLAGS)
+# Use our CFLAGS for the implicit .S rule & set the asm machine type
+ASFLAGS = $(CFLAGS) -Wa,-mpower4
TEST_GEN_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
EXTRA_SOURCES := validate.c ../harness.c
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 5c72ff978f27..c0e45d2dde25 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -4,7 +4,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
- $(SIGNAL_CONTEXT_CHK_TESTS)
+ $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
new file mode 100644
index 000000000000..85d63449243b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2015, Laurent Dufour, IBM Corp.
+ *
+ * Test the kernel's signal returning code to check reclaim is done if the
+ * sigreturn() is called while in a transaction (suspended since active is
+ * already dropped trough the system call path).
+ *
+ * The kernel must discard the transaction when entering sigreturn, since
+ * restoring the potential TM SPRS from the signal frame is requiring to not be
+ * in a transaction.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "tm.h"
+#include "utils.h"
+
+
+void handler(int sig)
+{
+ uint64_t ret;
+
+ asm __volatile__(
+ "li 3,1 ;"
+ "tbegin. ;"
+ "beq 1f ;"
+ "li 3,0 ;"
+ "tsuspend. ;"
+ "1: ;"
+ "std%X[ret] 3, %[ret] ;"
+ : [ret] "=m"(ret)
+ :
+ : "memory", "3", "cr0");
+
+ if (ret)
+ exit(1);
+
+ /*
+ * We return from the signal handle while in a suspended transaction
+ */
+}
+
+
+int tm_sigreturn(void)
+{
+ struct sigaction sa;
+ uint64_t ret = 0;
+
+ SKIP_IF(!have_htm());
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ sigemptyset(&sa.sa_mask);
+
+ if (sigaction(SIGSEGV, &sa, NULL))
+ exit(1);
+
+ asm __volatile__(
+ "tbegin. ;"
+ "beq 1f ;"
+ "li 3,0 ;"
+ "std 3,0(3) ;" /* trigger SEGV */
+ "li 3,1 ;"
+ "std%X[ret] 3,%[ret] ;"
+ "tend. ;"
+ "b 2f ;"
+ "1: ;"
+ "li 3,2 ;"
+ "std%X[ret] 3,%[ret] ;"
+ "2: ;"
+ : [ret] "=m"(ret)
+ :
+ : "memory", "3", "cr0");
+
+ if (ret != 2)
+ exit(1);
+
+ exit(0);
+}
+
+int main(void)
+{
+ return test_harness(tm_sigreturn, "tm_sigreturn");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index e6a0fad2bfd0..156c8e750259 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -80,7 +80,7 @@ bool is_failure(uint64_t condition_reg)
return ((condition_reg >> 28) & 0xa) == 0xa;
}
-void *ping(void *input)
+void *tm_una_ping(void *input)
{
/*
@@ -280,7 +280,7 @@ void *ping(void *input)
}
/* Thread to force context switch */
-void *pong(void *not_used)
+void *tm_una_pong(void *not_used)
{
/* Wait thread get its name "pong". */
if (DEBUG)
@@ -311,11 +311,11 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
do {
int rc;
- /* Bind 'ping' to CPU 0, as specified in 'attr'. */
- rc = pthread_create(&t0, attr, ping, (void *) &flags);
+ /* Bind to CPU 0, as specified in 'attr'. */
+ rc = pthread_create(&t0, attr, tm_una_ping, (void *) &flags);
if (rc)
pr_err(rc, "pthread_create()");
- rc = pthread_setname_np(t0, "ping");
+ rc = pthread_setname_np(t0, "tm_una_ping");
if (rc)
pr_warn(rc, "pthread_setname_np");
rc = pthread_join(t0, &ret_value);
@@ -333,13 +333,15 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
}
}
-int main(int argc, char **argv)
+int tm_unavailable_test(void)
{
int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
pthread_t t1;
pthread_attr_t attr;
cpu_set_t cpuset;
+ SKIP_IF(!have_htm());
+
/* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
@@ -354,12 +356,12 @@ int main(int argc, char **argv)
if (rc)
pr_err(rc, "pthread_attr_setaffinity_np()");
- rc = pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL);
+ rc = pthread_create(&t1, &attr /* Bind to CPU 0 */, tm_una_pong, NULL);
if (rc)
pr_err(rc, "pthread_create()");
/* Name it for systemtap convenience */
- rc = pthread_setname_np(t1, "pong");
+ rc = pthread_setname_np(t1, "tm_una_pong");
if (rc)
pr_warn(rc, "pthread_create()");
@@ -394,3 +396,9 @@ int main(int argc, char **argv)
exit(0);
}
}
+
+int main(int argc, char **argv)
+{
+ test_harness_set_timeout(220);
+ return test_harness(tm_unavailable_test, "tm_unavailable_test");
+}
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
new file mode 100644
index 000000000000..6c16f77c722c
--- /dev/null
+++ b/tools/testing/selftests/proc/.gitignore
@@ -0,0 +1,8 @@
+/proc-loadavg-001
+/proc-self-map-files-001
+/proc-self-map-files-002
+/proc-self-syscall
+/proc-self-wchan
+/proc-uptime-001
+/proc-uptime-002
+/read
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
new file mode 100644
index 000000000000..dbb87e56264c
--- /dev/null
+++ b/tools/testing/selftests/proc/Makefile
@@ -0,0 +1,13 @@
+CFLAGS += -Wall -O2
+
+TEST_GEN_PROGS :=
+TEST_GEN_PROGS += proc-loadavg-001
+TEST_GEN_PROGS += proc-self-map-files-001
+TEST_GEN_PROGS += proc-self-map-files-002
+TEST_GEN_PROGS += proc-self-syscall
+TEST_GEN_PROGS += proc-self-wchan
+TEST_GEN_PROGS += proc-uptime-001
+TEST_GEN_PROGS += proc-uptime-002
+TEST_GEN_PROGS += read
+
+include ../lib.mk
diff --git a/tools/testing/selftests/proc/config b/tools/testing/selftests/proc/config
new file mode 100644
index 000000000000..68fbd2b35884
--- /dev/null
+++ b/tools/testing/selftests/proc/config
@@ -0,0 +1 @@
+CONFIG_PROC_FS=y
diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c
new file mode 100644
index 000000000000..fcff7047000d
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-loadavg-001.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test that /proc/loadavg correctly reports last pid in pid namespace. */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+int main(void)
+{
+ pid_t pid;
+ int wstatus;
+
+ if (unshare(CLONE_NEWPID) == -1) {
+ if (errno == ENOSYS || errno == EPERM)
+ return 2;
+ return 1;
+ }
+
+ pid = fork();
+ if (pid == -1)
+ return 1;
+ if (pid == 0) {
+ char buf[128], *p;
+ int fd;
+ ssize_t rv;
+
+ fd = open("/proc/loadavg" , O_RDONLY);
+ if (fd == -1)
+ return 1;
+ rv = read(fd, buf, sizeof(buf));
+ if (rv < 3)
+ return 1;
+ p = buf + rv;
+
+ /* pid 1 */
+ if (!(p[-3] == ' ' && p[-2] == '1' && p[-1] == '\n'))
+ return 1;
+
+ pid = fork();
+ if (pid == -1)
+ return 1;
+ if (pid == 0)
+ return 0;
+ if (waitpid(pid, NULL, 0) == -1)
+ return 1;
+
+ lseek(fd, 0, SEEK_SET);
+ rv = read(fd, buf, sizeof(buf));
+ if (rv < 3)
+ return 1;
+ p = buf + rv;
+
+ /* pid 2 */
+ if (!(p[-3] == ' ' && p[-2] == '2' && p[-1] == '\n'))
+ return 1;
+
+ return 0;
+ }
+
+ if (waitpid(pid, &wstatus, 0) == -1)
+ return 1;
+ if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0)
+ return 0;
+ return 1;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c
new file mode 100644
index 000000000000..4209c64283d6
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-001.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1)
+ exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+ return;
+ exit(1);
+}
+
+int main(void)
+{
+ const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+ void *p;
+ int fd;
+ unsigned long a, b;
+
+ fd = open("/dev/zero", O_RDONLY);
+ if (fd == -1)
+ return 1;
+
+ p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE, fd, 0);
+ if (p == MAP_FAILED)
+ return 1;
+
+ a = (unsigned long)p;
+ b = (unsigned long)p + PAGE_SIZE;
+
+ pass("/proc/self/map_files/%lx-%lx", a, b);
+ fail("/proc/self/map_files/ %lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx -%lx", a, b);
+ fail("/proc/self/map_files/%lx- %lx", a, b);
+ fail("/proc/self/map_files/%lx-%lx ", a, b);
+ fail("/proc/self/map_files/0%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-0%lx", a, b);
+ if (sizeof(long) == 4) {
+ fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+ } else if (sizeof(long) == 8) {
+ fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+ } else
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
new file mode 100644
index 000000000000..6f1f4a6e1ecb
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... with address 0. */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1)
+ exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+ return;
+ exit(1);
+}
+
+int main(void)
+{
+ const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+ void *p;
+ int fd;
+ unsigned long a, b;
+
+ fd = open("/dev/zero", O_RDONLY);
+ if (fd == -1)
+ return 1;
+
+ p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+ if (p == MAP_FAILED) {
+ if (errno == EPERM)
+ return 2;
+ return 1;
+ }
+
+ a = (unsigned long)p;
+ b = (unsigned long)p + PAGE_SIZE;
+
+ pass("/proc/self/map_files/%lx-%lx", a, b);
+ fail("/proc/self/map_files/ %lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx -%lx", a, b);
+ fail("/proc/self/map_files/%lx- %lx", a, b);
+ fail("/proc/self/map_files/%lx-%lx ", a, b);
+ fail("/proc/self/map_files/0%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-0%lx", a, b);
+ if (sizeof(long) == 4) {
+ fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+ } else if (sizeof(long) == 8) {
+ fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+ } else
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c
new file mode 100644
index 000000000000..5ab5f4810e43
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-syscall.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+static inline ssize_t sys_read(int fd, void *buf, size_t len)
+{
+ return syscall(SYS_read, fd, buf, len);
+}
+
+int main(void)
+{
+ char buf1[64];
+ char buf2[64];
+ int fd;
+ ssize_t rv;
+
+ fd = open("/proc/self/syscall", O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT)
+ return 2;
+ return 1;
+ }
+
+ /* Do direct system call as libc can wrap anything. */
+ snprintf(buf1, sizeof(buf1), "%ld 0x%lx 0x%lx 0x%lx",
+ (long)SYS_read, (long)fd, (long)buf2, (long)sizeof(buf2));
+
+ memset(buf2, 0, sizeof(buf2));
+ rv = sys_read(fd, buf2, sizeof(buf2));
+ if (rv < 0)
+ return 1;
+ if (rv < strlen(buf1))
+ return 1;
+ if (strncmp(buf1, buf2, strlen(buf1)) != 0)
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c
new file mode 100644
index 000000000000..a38b2fbaa7ad
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-wchan.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+ char buf[64];
+ int fd;
+
+ fd = open("/proc/self/wchan", O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT)
+ return 2;
+ return 1;
+ }
+
+ buf[0] = '\0';
+ if (read(fd, buf, sizeof(buf)) != 1)
+ return 1;
+ if (buf[0] != '0')
+ return 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c
new file mode 100644
index 000000000000..781f7a50fc3f
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-001.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically.
+#undef NDEBUG
+#include <assert.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+int main(void)
+{
+ uint64_t start, u0, u1, i0, i1;
+ int fd;
+
+ fd = open("/proc/uptime", O_RDONLY);
+ assert(fd >= 0);
+
+ proc_uptime(fd, &u0, &i0);
+ start = u0;
+ do {
+ proc_uptime(fd, &u1, &i1);
+ assert(u1 >= u0);
+ assert(i1 >= i0);
+ u0 = u1;
+ i0 = i1;
+ } while (u1 - start < 100);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c
new file mode 100644
index 000000000000..30e2b7849089
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-002.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically
+// while shifting across CPUs.
+#define _GNU_SOURCE
+#undef NDEBUG
+#include <assert.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+static inline int sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+ return syscall(SYS_sched_getaffinity, pid, len, m);
+}
+
+static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+ return syscall(SYS_sched_setaffinity, pid, len, m);
+}
+
+int main(void)
+{
+ unsigned int len;
+ unsigned long *m;
+ unsigned int cpu;
+ uint64_t u0, u1, i0, i1;
+ int fd;
+
+ /* find out "nr_cpu_ids" */
+ m = NULL;
+ len = 0;
+ do {
+ len += sizeof(unsigned long);
+ free(m);
+ m = malloc(len);
+ } while (sys_sched_getaffinity(0, len, m) == -EINVAL);
+
+ fd = open("/proc/uptime", O_RDONLY);
+ assert(fd >= 0);
+
+ proc_uptime(fd, &u0, &i0);
+ for (cpu = 0; cpu < len * 8; cpu++) {
+ memset(m, 0, len);
+ m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long)));
+
+ /* CPU might not exist, ignore error */
+ sys_sched_setaffinity(0, len, m);
+
+ proc_uptime(fd, &u1, &i1);
+ assert(u1 >= u0);
+ assert(i1 >= i0);
+ u0 = u1;
+ i0 = i1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h
new file mode 100644
index 000000000000..0e464b50e9d9
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static unsigned long long xstrtoull(const char *p, char **end)
+{
+ if (*p == '0') {
+ *end = (char *)p + 1;
+ return 0;
+ } else if ('1' <= *p && *p <= '9') {
+ unsigned long long val;
+
+ errno = 0;
+ val = strtoull(p, end, 10);
+ assert(errno == 0);
+ return val;
+ } else
+ assert(0);
+}
+
+static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
+{
+ uint64_t val1, val2;
+ char buf[64], *p;
+ ssize_t rv;
+
+ /* save "p < end" checks */
+ memset(buf, 0, sizeof(buf));
+ rv = pread(fd, buf, sizeof(buf), 0);
+ assert(0 <= rv && rv <= sizeof(buf));
+ buf[sizeof(buf) - 1] = '\0';
+
+ p = buf;
+
+ val1 = xstrtoull(p, &p);
+ assert(p[0] == '.');
+ assert('0' <= p[1] && p[1] <= '9');
+ assert('0' <= p[2] && p[2] <= '9');
+ assert(p[3] == ' ');
+
+ val2 = (p[1] - '0') * 10 + p[2] - '0';
+ *uptime = val1 * 100 + val2;
+
+ p += 4;
+
+ val1 = xstrtoull(p, &p);
+ assert(p[0] == '.');
+ assert('0' <= p[1] && p[1] <= '9');
+ assert('0' <= p[2] && p[2] <= '9');
+ assert(p[3] == '\n');
+
+ val2 = (p[1] - '0') * 10 + p[2] - '0';
+ *idle = val1 * 100 + val2;
+
+ assert(p + 4 == buf + rv);
+}
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c
new file mode 100644
index 000000000000..1e73c2232097
--- /dev/null
+++ b/tools/testing/selftests/proc/read.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test
+// 1) read of every file in /proc
+// 2) readlink of every symlink in /proc
+// 3) recursively (1) + (2) for every directory in /proc
+// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs
+// 5) write to /proc/sysrq-trigger
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static inline bool streq(const char *s1, const char *s2)
+{
+ return strcmp(s1, s2) == 0;
+}
+
+static struct dirent *xreaddir(DIR *d)
+{
+ struct dirent *de;
+
+ errno = 0;
+ de = readdir(d);
+ if (!de && errno != 0) {
+ exit(1);
+ }
+ return de;
+}
+
+static void f_reg(DIR *d, const char *filename)
+{
+ char buf[4096];
+ int fd;
+ ssize_t rv;
+
+ /* read from /proc/kmsg can block */
+ fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK);
+ if (fd == -1)
+ return;
+ rv = read(fd, buf, sizeof(buf));
+ assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+ close(fd);
+}
+
+static void f_reg_write(DIR *d, const char *filename, const char *buf, size_t len)
+{
+ int fd;
+ ssize_t rv;
+
+ fd = openat(dirfd(d), filename, O_WRONLY);
+ if (fd == -1)
+ return;
+ rv = write(fd, buf, len);
+ assert((0 <= rv && rv <= len) || rv == -1);
+ close(fd);
+}
+
+static void f_lnk(DIR *d, const char *filename)
+{
+ char buf[4096];
+ ssize_t rv;
+
+ rv = readlinkat(dirfd(d), filename, buf, sizeof(buf));
+ assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+}
+
+static void f(DIR *d, unsigned int level)
+{
+ struct dirent *de;
+
+ de = xreaddir(d);
+ assert(de->d_type == DT_DIR);
+ assert(streq(de->d_name, "."));
+
+ de = xreaddir(d);
+ assert(de->d_type == DT_DIR);
+ assert(streq(de->d_name, ".."));
+
+ while ((de = xreaddir(d))) {
+ assert(!streq(de->d_name, "."));
+ assert(!streq(de->d_name, ".."));
+
+ switch (de->d_type) {
+ DIR *dd;
+ int fd;
+
+ case DT_REG:
+ if (level == 0 && streq(de->d_name, "sysrq-trigger")) {
+ f_reg_write(d, de->d_name, "h", 1);
+ } else if (level == 1 && streq(de->d_name, "clear_refs")) {
+ f_reg_write(d, de->d_name, "1", 1);
+ } else if (level == 3 && streq(de->d_name, "clear_refs")) {
+ f_reg_write(d, de->d_name, "1", 1);
+ } else {
+ f_reg(d, de->d_name);
+ }
+ break;
+ case DT_DIR:
+ fd = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY);
+ if (fd == -1)
+ continue;
+ dd = fdopendir(fd);
+ if (!dd)
+ continue;
+ f(dd, level + 1);
+ closedir(dd);
+ break;
+ case DT_LNK:
+ f_lnk(d, de->d_name);
+ break;
+ default:
+ assert(0);
+ }
+ }
+}
+
+int main(void)
+{
+ DIR *d;
+
+ d = opendir("/proc");
+ if (!d)
+ return 2;
+ f(d, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
new file mode 100755
index 000000000000..98f650c9bf54
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# Invoke a text editor on all console.log files for all runs with diagnostics,
+# that is, on all such files having a console.log.diags counterpart.
+# Note that both console.log.diags and console.log are passed to the
+# editor (currently defaulting to "vi"), allowing the user to get an
+# idea of what to search for in the console.log file.
+#
+# Usage: kvm-find-errors.sh directory
+#
+# The "directory" above should end with the date/time directory, for example,
+# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
+
+rundir="${1}"
+if test -z "$rundir" -o ! -d "$rundir"
+then
+ echo Usage: $0 directory
+fi
+editor=${EDITOR-vi}
+
+# Find builds with errors
+files=
+for i in ${rundir}/*/Make.out
+do
+ if egrep -q "error:|warning:" < $i
+ then
+ egrep "error:|warning:" < $i > $i.diags
+ files="$files $i.diags $i"
+ fi
+done
+if test -n "$files"
+then
+ $editor $files
+else
+ echo No build errors.
+fi
+if grep -q -e "--buildonly" < ${rundir}/log
+then
+ echo Build-only run, no console logs to check.
+fi
+
+# Find console logs with errors
+files=
+for i in ${rundir}/*/console.log
+do
+ if test -r $i.diags
+ then
+ files="$files $i.diags $i"
+ fi
+done
+if test -n "$files"
+then
+ $editor $files
+else
+ echo No errors in console logs.
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index c2e1bb6d0cba..477ecb1293ab 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -34,11 +34,15 @@ fi
configfile=`echo $i | sed -e 's/^.*\///'`
ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'`
+stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
+ tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
+ awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
+ tr -d '\012\015'`"
if test -z "$ngps"
then
- echo "$configfile -------"
+ echo "$configfile ------- " $stopstate
else
- title="$configfile ------- $ngps grace periods"
+ title="$configfile ------- $ngps GPs"
dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
if test -z "$dur"
then
@@ -46,9 +50,9 @@ else
else
ngpsps=`awk -v ngps=$ngps -v dur=$dur '
BEGIN { print ngps / dur }' < /dev/null`
- title="$title ($ngpsps per second)"
+ title="$title ($ngpsps/s)"
fi
- echo $title
+ echo $title $stopstate
nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
if test -z "$nclosecalls"
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index f7e988f369dd..c27e97824163 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -48,10 +48,6 @@ do
cat $i/Make.oldconfig.err
fi
parse-build.sh $i/Make.out $configfile
- if test "$TORTURE_SUITE" != rcuperf
- then
- parse-torture.sh $i/console.log $configfile
- fi
parse-console.sh $i/console.log $configfile
if test -r $i/Warnings
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 5f8fbb0d7c17..c5b0f94341d9 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -267,5 +267,4 @@ then
echo Unknown PID, cannot kill qemu command
fi
-parse-torture.sh $resdir/console.log $title
parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 08aa7d50ae0e..17293436f551 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -24,57 +24,146 @@
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+T=${TMPDIR-/tmp}/parse-console.sh.$$
file="$1"
title="$2"
+trap 'rm -f $T.seq $T.diags' 0
+
. functions.sh
+# Check for presence and readability of console output file
+if test -f "$file" -a -r "$file"
+then
+ :
+else
+ echo $title unreadable console output file: $file
+ exit 1
+fi
if grep -Pq '\x00' < $file
then
print_warning Console output contains nul bytes, old qemu still running?
fi
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
-if test -s $1.diags
+cat /dev/null > $file.diags
+
+# Check for proper termination, except that rcuperf runs don't indicate this.
+if test "$TORTURE_SUITE" != rcuperf
then
- print_warning Assertion failure in $file $title
- # cat $1.diags
+ # check for abject failure
+
+ if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
+ then
+ nerrs=`grep --binary-files=text '!!!' $file |
+ tail -1 |
+ awk '
+ {
+ for (i=NF-8;i<=NF;i++)
+ sum+=$i;
+ }
+ END { print sum }'`
+ print_bug $title FAILURE, $nerrs instances
+ exit
+ fi
+
+ grep --binary-files=text 'torture:.*ver:' $file |
+ egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
+ sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
+ awk '
+ BEGIN {
+ ver = 0;
+ badseq = 0;
+ }
+
+ {
+ if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
+ badseqno1 = ver;
+ badseqno2 = $5;
+ badseqnr = NR;
+ badseq = 1;
+ }
+ ver = $5
+ }
+
+ END {
+ if (badseq) {
+ if (badseqno1 == badseqno2 && badseqno2 == ver)
+ print "GP HANG at " ver " torture stat " badseqnr;
+ else
+ print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
+ }
+ }' > $T.seq
+
+ if grep -q SUCCESS $file
+ then
+ if test -s $T.seq
+ then
+ print_warning $title `cat $T.seq`
+ echo " " $file
+ exit 2
+ fi
+ else
+ if grep -q "_HOTPLUG:" $file
+ then
+ print_warning HOTPLUG FAILURES $title `cat $T.seq`
+ echo " " $file
+ exit 3
+ fi
+ echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
+ if test -s $T.seq
+ then
+ print_warning $title `cat $T.seq`
+ fi
+ exit 2
+ fi
+fi | tee -a $file.diags
+
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
+grep -v 'ODEBUG: ' |
+grep -v 'Warning: unable to open an initial console' > $T.diags
+if test -s $T.diags
+then
+ print_warning "Assertion failure in $file $title"
+ # cat $T.diags
summary=""
- n_badness=`grep -c Badness $1`
+ n_badness=`grep -c Badness $file`
if test "$n_badness" -ne 0
then
summary="$summary Badness: $n_badness"
fi
- n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'`
+ n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
if test "$n_warn" -ne 0
then
summary="$summary Warnings: $n_warn"
fi
- n_bugs=`egrep -c 'BUG|Oops:' $1`
+ n_bugs=`egrep -c 'BUG|Oops:' $file`
if test "$n_bugs" -ne 0
then
summary="$summary Bugs: $n_bugs"
fi
- n_calltrace=`grep -c 'Call Trace:' $1`
+ n_calltrace=`grep -c 'Call Trace:' $file`
if test "$n_calltrace" -ne 0
then
summary="$summary Call Traces: $n_calltrace"
fi
- n_lockdep=`grep -c =========== $1`
+ n_lockdep=`grep -c =========== $file`
if test "$n_badness" -ne 0
then
summary="$summary lockdep: $n_badness"
fi
- n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1`
+ n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
if test "$n_stalls" -ne 0
then
summary="$summary Stalls: $n_stalls"
fi
- n_starves=`grep -c 'rcu_.*kthread starved for' $1`
+ n_starves=`grep -c 'rcu_.*kthread starved for' $file`
if test "$n_starves" -ne 0
then
summary="$summary Starves: $n_starves"
fi
print_warning Summary: $summary
-else
- rm $1.diags
+ cat $T.diags >> $file.diags
+fi
+if ! test -s $file.diags
+then
+ rm -f $file.diags
fi
diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh
deleted file mode 100755
index 5987e50cfeb4..000000000000
--- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-#
-# Check the console output from a torture run for goodness.
-# The "file" is a pathname on the local system, and "title" is
-# a text string for error-message purposes.
-#
-# The file must contain torture output, but can be interspersed
-# with other dmesg text, as in console-log output.
-#
-# Usage: parse-torture.sh file title
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-# Copyright (C) IBM Corporation, 2011
-#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-
-T=${TMPDIR-/tmp}/parse-torture.sh.$$
-file="$1"
-title="$2"
-
-trap 'rm -f $T.seq' 0
-
-. functions.sh
-
-# check for presence of torture output file.
-
-if test -f "$file" -a -r "$file"
-then
- :
-else
- echo $title unreadable torture output file: $file
- exit 1
-fi
-
-# check for abject failure
-
-if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
-then
- nerrs=`grep --binary-files=text '!!!' $file | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
- print_bug $title FAILURE, $nerrs instances
- echo " " $url
- exit
-fi
-
-grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
-awk '
-BEGIN {
- ver = 0;
- badseq = 0;
- }
-
- {
- if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
- badseqno1 = ver;
- badseqno2 = $5;
- badseqnr = NR;
- badseq = 1;
- }
- ver = $5
- }
-
-END {
- if (badseq) {
- if (badseqno1 == badseqno2 && badseqno2 == ver)
- print "GP HANG at " ver " torture stat " badseqnr;
- else
- print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
- }
- }' > $T.seq
-
-if grep -q SUCCESS $file
-then
- if test -s $T.seq
- then
- print_warning $title $title `cat $T.seq`
- echo " " $file
- exit 2
- fi
-else
- if grep -q "_HOTPLUG:" $file
- then
- print_warning HOTPLUG FAILURES $title `cat $T.seq`
- echo " " $file
- exit 3
- fi
- echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
- if test -s $T.seq
- then
- print_warning $title `cat $T.seq`
- fi
- exit 2
-fi
diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore
new file mode 100644
index 000000000000..d0ad44f6294a
--- /dev/null
+++ b/tools/testing/selftests/rtc/.gitignore
@@ -0,0 +1,2 @@
+rtctest
+setdate
diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile
new file mode 100644
index 000000000000..de9c8566672a
--- /dev/null
+++ b/tools/testing/selftests/rtc/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O3 -Wl,-no-as-needed -Wall
+LDFLAGS += -lrt -lpthread -lm
+
+TEST_GEN_PROGS = rtctest
+
+TEST_GEN_PROGS_EXTENDED = setdate
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c
new file mode 100644
index 000000000000..e20b017e7073
--- /dev/null
+++ b/tools/testing/selftests/rtc/rtctest.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Real Time Clock Driver Test Program
+ *
+ * Copyright (c) 2018 Alexandre Belloni <alexandre.belloni@bootlin.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/rtc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#define NUM_UIE 3
+#define ALARM_DELTA 3
+
+static char *rtc_file = "/dev/rtc0";
+
+FIXTURE(rtc) {
+ int fd;
+};
+
+FIXTURE_SETUP(rtc) {
+ self->fd = open(rtc_file, O_RDONLY);
+ ASSERT_NE(-1, self->fd);
+}
+
+FIXTURE_TEARDOWN(rtc) {
+ close(self->fd);
+}
+
+TEST_F(rtc, date_read) {
+ int rc;
+ struct rtc_time rtc_tm;
+
+ /* Read the RTC time/date */
+ rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Current RTC date/time is %02d/%02d/%02d %02d:%02d:%02d.",
+ rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
+ rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+}
+
+TEST_F(rtc, uie_read) {
+ int i, rc, irq = 0;
+ unsigned long data;
+
+ /* Turn on update interrupts */
+ rc = ioctl(self->fd, RTC_UIE_ON, 0);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip update IRQs not supported.");
+ return;
+ }
+
+ for (i = 0; i < NUM_UIE; i++) {
+ /* This read will block */
+ rc = read(self->fd, &data, sizeof(data));
+ ASSERT_NE(-1, rc);
+ irq++;
+ }
+
+ EXPECT_EQ(NUM_UIE, irq);
+
+ rc = ioctl(self->fd, RTC_UIE_OFF, 0);
+ ASSERT_NE(-1, rc);
+}
+
+TEST_F(rtc, uie_select) {
+ int i, rc, irq = 0;
+ unsigned long data;
+
+ /* Turn on update interrupts */
+ rc = ioctl(self->fd, RTC_UIE_ON, 0);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip update IRQs not supported.");
+ return;
+ }
+
+ for (i = 0; i < NUM_UIE; i++) {
+ struct timeval tv = { .tv_sec = 2 };
+ fd_set readfds;
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+ /* The select will wait until an RTC interrupt happens. */
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(0, rc);
+
+ /* This read won't block */
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+ irq++;
+ }
+
+ EXPECT_EQ(NUM_UIE, irq);
+
+ rc = ioctl(self->fd, RTC_UIE_OFF, 0);
+ ASSERT_NE(-1, rc);
+}
+
+TEST_F(rtc, alarm_alm_set) {
+ struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
+ unsigned long data;
+ struct rtc_time tm;
+ fd_set readfds;
+ time_t secs, new;
+ int rc;
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ secs = timegm((struct tm *)&tm) + ALARM_DELTA;
+ gmtime_r(&secs, (struct tm *)&tm);
+
+ rc = ioctl(self->fd, RTC_ALM_SET, &tm);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip alarms are not supported.");
+ return;
+ }
+
+ rc = ioctl(self->fd, RTC_ALM_READ, &tm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Alarm time now set to %02d:%02d:%02d.",
+ tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+ /* Enable alarm interrupts */
+ rc = ioctl(self->fd, RTC_AIE_ON, 0);
+ ASSERT_NE(-1, rc);
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ EXPECT_NE(0, rc);
+
+ /* Disable alarm interrupts */
+ rc = ioctl(self->fd, RTC_AIE_OFF, 0);
+ ASSERT_NE(-1, rc);
+
+ if (rc == 0)
+ return;
+
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+ TH_LOG("data: %lx", data);
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ new = timegm((struct tm *)&tm);
+ ASSERT_EQ(new, secs);
+}
+
+TEST_F(rtc, alarm_wkalm_set) {
+ struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
+ struct rtc_wkalrm alarm = { 0 };
+ struct rtc_time tm;
+ unsigned long data;
+ fd_set readfds;
+ time_t secs, new;
+ int rc;
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
+ ASSERT_NE(-1, rc);
+
+ secs = timegm((struct tm *)&alarm.time) + ALARM_DELTA;
+ gmtime_r(&secs, (struct tm *)&alarm.time);
+
+ alarm.enabled = 1;
+
+ rc = ioctl(self->fd, RTC_WKALM_SET, &alarm);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip alarms are not supported.");
+ return;
+ }
+
+ rc = ioctl(self->fd, RTC_WKALM_RD, &alarm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.",
+ alarm.time.tm_mday, alarm.time.tm_mon + 1,
+ alarm.time.tm_year + 1900, alarm.time.tm_hour,
+ alarm.time.tm_min, alarm.time.tm_sec);
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ EXPECT_NE(0, rc);
+
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ new = timegm((struct tm *)&tm);
+ ASSERT_EQ(new, secs);
+}
+
+static void __attribute__((constructor))
+__constructor_order_last(void)
+{
+ if (!__constructor_order)
+ __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
+}
+
+int main(int argc, char **argv)
+{
+ switch (argc) {
+ case 2:
+ rtc_file = argv[1];
+ /* FALLTHROUGH */
+ case 1:
+ break;
+ default:
+ fprintf(stderr, "usage: %s [rtcdev]\n", argv[0]);
+ return 1;
+ }
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/timers/rtctest_setdate.c b/tools/testing/selftests/rtc/setdate.c
index 2cb78489eca4..2cb78489eca4 100644
--- a/tools/testing/selftests/timers/rtctest_setdate.c
+++ b/tools/testing/selftests/rtc/setdate.c
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 5df609950a66..e1473234968d 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -134,11 +134,15 @@ struct seccomp_data {
#endif
#ifndef SECCOMP_FILTER_FLAG_TSYNC
-#define SECCOMP_FILTER_FLAG_TSYNC 1
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
#endif
#ifndef SECCOMP_FILTER_FLAG_LOG
-#define SECCOMP_FILTER_FLAG_LOG 2
+#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
#endif
#ifndef PTRACE_SECCOMP_GET_METADATA
@@ -2072,14 +2076,26 @@ TEST(seccomp_syscall_mode_lock)
TEST(detect_seccomp_filter_flags)
{
unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
- SECCOMP_FILTER_FLAG_LOG };
+ SECCOMP_FILTER_FLAG_LOG,
+ SECCOMP_FILTER_FLAG_SPEC_ALLOW };
unsigned int flag, all_flags;
int i;
long ret;
/* Test detection of known-good filter flags */
for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
+ int bits = 0;
+
flag = flags[i];
+ /* Make sure the flag is a single bit! */
+ while (flag) {
+ if (flag & 0x1)
+ bits ++;
+ flag >>= 1;
+ }
+ ASSERT_EQ(1, bits);
+ flag = flags[i];
+
ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
@@ -2860,6 +2876,7 @@ TEST(get_metadata)
int pipefd[2];
char buf;
struct seccomp_metadata md;
+ long ret;
ASSERT_EQ(0, pipe(pipefd));
@@ -2893,16 +2910,26 @@ TEST(get_metadata)
ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
ASSERT_EQ(pid, waitpid(pid, NULL, 0));
+ /* Past here must not use ASSERT or child process is never killed. */
+
md.filter_off = 0;
- ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+ errno = 0;
+ ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
+ EXPECT_EQ(sizeof(md), ret) {
+ if (errno == EINVAL)
+ XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
+ }
+
EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
EXPECT_EQ(md.filter_off, 0);
md.filter_off = 1;
- ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md));
+ ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
+ EXPECT_EQ(sizeof(md), ret);
EXPECT_EQ(md.flags, 0);
EXPECT_EQ(md.filter_off, 1);
+skip:
ASSERT_EQ(0, kill(pid, SIGKILL));
}
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 5b012f4981d4..6f289a49e5ec 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -66,7 +66,7 @@
"cmdUnderTest": "$TC action add action bpf object-file _b.o index 667",
"expExitCode": "0",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref",
"matchCount": "1",
"teardown": [
"$TC action flush action bpf",
@@ -92,10 +92,15 @@
"cmdUnderTest": "$TC action add action bpf object-file _c.o index 667",
"expExitCode": "255",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref",
"matchCount": "0",
"teardown": [
- "$TC action flush action bpf",
+ [
+ "$TC action flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
"rm -f _c.o"
]
},
diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore
index 2c8ac8416299..32a9eadb2d4e 100644
--- a/tools/testing/selftests/timers/.gitignore
+++ b/tools/testing/selftests/timers/.gitignore
@@ -9,7 +9,7 @@ nanosleep
nsleep-lat
posix_timers
raw_skew
-rtctest
+rtcpie
set-2038
set-tai
set-timer-lat
@@ -19,4 +19,3 @@ valid-adjtimex
adjtick
set-tz
freq-step
-rtctest_setdate
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 3496680981f2..c02683cfb6c9 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -5,13 +5,13 @@ LDFLAGS += -lrt -lpthread -lm
# these are all "safe" tests that don't modify
# system time or require escalated privileges
TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
- inconsistency-check raw_skew threadtest rtctest
+ inconsistency-check raw_skew threadtest rtcpie
DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
skew_consistency clocksource-switch freq-step leap-a-day \
leapcrash set-tai set-2038 set-tz
-TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) rtctest_setdate
+TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
include ../lib.mk
diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c
new file mode 100644
index 000000000000..47b5bad1b393
--- /dev/null
+++ b/tools/testing/selftests/timers/rtcpie.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Real Time Clock Periodic Interrupt test program
+ *
+ * Since commit 6610e0893b8bc ("RTC: Rework RTC code to use timerqueue for
+ * events"), PIE are completely handled using hrtimers, without actually using
+ * any underlying hardware RTC.
+ *
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+/*
+ * This expects the new RTC class driver framework, working with
+ * clocks that will often not be clones of what the PC-AT had.
+ * Use the command line to specify another RTC if you need one.
+ */
+static const char default_rtc[] = "/dev/rtc0";
+
+int main(int argc, char **argv)
+{
+ int i, fd, retval, irqcount = 0;
+ unsigned long tmp, data, old_pie_rate;
+ const char *rtc = default_rtc;
+ struct timeval start, end, diff;
+
+ switch (argc) {
+ case 2:
+ rtc = argv[1];
+ /* FALLTHROUGH */
+ case 1:
+ break;
+ default:
+ fprintf(stderr, "usage: rtctest [rtcdev] [d]\n");
+ return 1;
+ }
+
+ fd = open(rtc, O_RDONLY);
+
+ if (fd == -1) {
+ perror(rtc);
+ exit(errno);
+ }
+
+ /* Read periodic IRQ rate */
+ retval = ioctl(fd, RTC_IRQP_READ, &old_pie_rate);
+ if (retval == -1) {
+ /* not all RTCs support periodic IRQs */
+ if (errno == EINVAL) {
+ fprintf(stderr, "\nNo periodic IRQ support\n");
+ goto done;
+ }
+ perror("RTC_IRQP_READ ioctl");
+ exit(errno);
+ }
+ fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", old_pie_rate);
+
+ fprintf(stderr, "Counting 20 interrupts at:");
+ fflush(stderr);
+
+ /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
+ for (tmp=2; tmp<=64; tmp*=2) {
+
+ retval = ioctl(fd, RTC_IRQP_SET, tmp);
+ if (retval == -1) {
+ /* not all RTCs can change their periodic IRQ rate */
+ if (errno == EINVAL) {
+ fprintf(stderr,
+ "\n...Periodic IRQ rate is fixed\n");
+ goto done;
+ }
+ perror("RTC_IRQP_SET ioctl");
+ exit(errno);
+ }
+
+ fprintf(stderr, "\n%ldHz:\t", tmp);
+ fflush(stderr);
+
+ /* Enable periodic interrupts */
+ retval = ioctl(fd, RTC_PIE_ON, 0);
+ if (retval == -1) {
+ perror("RTC_PIE_ON ioctl");
+ exit(errno);
+ }
+
+ for (i=1; i<21; i++) {
+ gettimeofday(&start, NULL);
+ /* This blocks */
+ retval = read(fd, &data, sizeof(unsigned long));
+ if (retval == -1) {
+ perror("read");
+ exit(errno);
+ }
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ if (diff.tv_sec > 0 ||
+ diff.tv_usec > ((1000000L / tmp) * 1.10)) {
+ fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
+ diff.tv_sec, diff.tv_usec,
+ (1000000L / tmp));
+ fflush(stdout);
+ exit(-1);
+ }
+
+ fprintf(stderr, " %d",i);
+ fflush(stderr);
+ irqcount++;
+ }
+
+ /* Disable periodic interrupts */
+ retval = ioctl(fd, RTC_PIE_OFF, 0);
+ if (retval == -1) {
+ perror("RTC_PIE_OFF ioctl");
+ exit(errno);
+ }
+ }
+
+done:
+ ioctl(fd, RTC_IRQP_SET, old_pie_rate);
+
+ fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
+
+ close(fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c
deleted file mode 100644
index 411eff625e66..000000000000
--- a/tools/testing/selftests/timers/rtctest.c
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- * Real Time Clock Driver Test/Example Program
- *
- * Compile with:
- * gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest
- *
- * Copyright (C) 1996, Paul Gortmaker.
- *
- * Released under the GNU General Public License, version 2,
- * included herein by reference.
- *
- */
-
-#include <stdio.h>
-#include <linux/rtc.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <errno.h>
-
-#ifndef ARRAY_SIZE
-# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
-/*
- * This expects the new RTC class driver framework, working with
- * clocks that will often not be clones of what the PC-AT had.
- * Use the command line to specify another RTC if you need one.
- */
-static const char default_rtc[] = "/dev/rtc0";
-
-static struct rtc_time cutoff_dates[] = {
- {
- .tm_year = 70, /* 1970 -1900 */
- .tm_mday = 1,
- },
- /* signed time_t 19/01/2038 3:14:08 */
- {
- .tm_year = 138,
- .tm_mday = 19,
- },
- {
- .tm_year = 138,
- .tm_mday = 20,
- },
- {
- .tm_year = 199, /* 2099 -1900 */
- .tm_mday = 1,
- },
- {
- .tm_year = 200, /* 2100 -1900 */
- .tm_mday = 1,
- },
- /* unsigned time_t 07/02/2106 7:28:15*/
- {
- .tm_year = 205,
- .tm_mon = 1,
- .tm_mday = 7,
- },
- {
- .tm_year = 206,
- .tm_mon = 1,
- .tm_mday = 8,
- },
- /* signed time on 64bit in nanoseconds 12/04/2262 01:47:16*/
- {
- .tm_year = 362,
- .tm_mon = 3,
- .tm_mday = 12,
- },
- {
- .tm_year = 362, /* 2262 -1900 */
- .tm_mon = 3,
- .tm_mday = 13,
- },
-};
-
-static int compare_dates(struct rtc_time *a, struct rtc_time *b)
-{
- if (a->tm_year != b->tm_year ||
- a->tm_mon != b->tm_mon ||
- a->tm_mday != b->tm_mday ||
- a->tm_hour != b->tm_hour ||
- a->tm_min != b->tm_min ||
- ((b->tm_sec - a->tm_sec) > 1))
- return 1;
-
- return 0;
-}
-
-int main(int argc, char **argv)
-{
- int i, fd, retval, irqcount = 0, dangerous = 0;
- unsigned long tmp, data;
- struct rtc_time rtc_tm;
- const char *rtc = default_rtc;
- struct timeval start, end, diff;
-
- switch (argc) {
- case 3:
- if (*argv[2] == 'd')
- dangerous = 1;
- case 2:
- rtc = argv[1];
- /* FALLTHROUGH */
- case 1:
- break;
- default:
- fprintf(stderr, "usage: rtctest [rtcdev] [d]\n");
- return 1;
- }
-
- fd = open(rtc, O_RDONLY);
-
- if (fd == -1) {
- perror(rtc);
- exit(errno);
- }
-
- fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n");
-
- /* Turn on update interrupts (one per second) */
- retval = ioctl(fd, RTC_UIE_ON, 0);
- if (retval == -1) {
- if (errno == EINVAL) {
- fprintf(stderr,
- "\n...Update IRQs not supported.\n");
- goto test_READ;
- }
- perror("RTC_UIE_ON ioctl");
- exit(errno);
- }
-
- fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:",
- rtc);
- fflush(stderr);
- for (i=1; i<6; i++) {
- /* This read will block */
- retval = read(fd, &data, sizeof(unsigned long));
- if (retval == -1) {
- perror("read");
- exit(errno);
- }
- fprintf(stderr, " %d",i);
- fflush(stderr);
- irqcount++;
- }
-
- fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:");
- fflush(stderr);
- for (i=1; i<6; i++) {
- struct timeval tv = {5, 0}; /* 5 second timeout on select */
- fd_set readfds;
-
- FD_ZERO(&readfds);
- FD_SET(fd, &readfds);
- /* The select will wait until an RTC interrupt happens. */
- retval = select(fd+1, &readfds, NULL, NULL, &tv);
- if (retval == -1) {
- perror("select");
- exit(errno);
- }
- /* This read won't block unlike the select-less case above. */
- retval = read(fd, &data, sizeof(unsigned long));
- if (retval == -1) {
- perror("read");
- exit(errno);
- }
- fprintf(stderr, " %d",i);
- fflush(stderr);
- irqcount++;
- }
-
- /* Turn off update interrupts */
- retval = ioctl(fd, RTC_UIE_OFF, 0);
- if (retval == -1) {
- perror("RTC_UIE_OFF ioctl");
- exit(errno);
- }
-
-test_READ:
- /* Read the RTC time/date */
- retval = ioctl(fd, RTC_RD_TIME, &rtc_tm);
- if (retval == -1) {
- perror("RTC_RD_TIME ioctl");
- exit(errno);
- }
-
- fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
- rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
- rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
-
- /* Set the alarm to 5 sec in the future, and check for rollover */
- rtc_tm.tm_sec += 5;
- if (rtc_tm.tm_sec >= 60) {
- rtc_tm.tm_sec %= 60;
- rtc_tm.tm_min++;
- }
- if (rtc_tm.tm_min == 60) {
- rtc_tm.tm_min = 0;
- rtc_tm.tm_hour++;
- }
- if (rtc_tm.tm_hour == 24)
- rtc_tm.tm_hour = 0;
-
- retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
- if (retval == -1) {
- if (errno == EINVAL) {
- fprintf(stderr,
- "\n...Alarm IRQs not supported.\n");
- goto test_PIE;
- }
-
- perror("RTC_ALM_SET ioctl");
- exit(errno);
- }
-
- /* Read the current alarm settings */
- retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
- if (retval == -1) {
- if (errno == EINVAL) {
- fprintf(stderr,
- "\n...EINVAL reading current alarm setting.\n");
- goto test_PIE;
- }
- perror("RTC_ALM_READ ioctl");
- exit(errno);
- }
-
- fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n",
- rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
-
- /* Enable alarm interrupts */
- retval = ioctl(fd, RTC_AIE_ON, 0);
- if (retval == -1) {
- if (errno == EINVAL || errno == EIO) {
- fprintf(stderr,
- "\n...Alarm IRQs not supported.\n");
- goto test_PIE;
- }
-
- perror("RTC_AIE_ON ioctl");
- exit(errno);
- }
-
- fprintf(stderr, "Waiting 5 seconds for alarm...");
- fflush(stderr);
- /* This blocks until the alarm ring causes an interrupt */
- retval = read(fd, &data, sizeof(unsigned long));
- if (retval == -1) {
- perror("read");
- exit(errno);
- }
- irqcount++;
- fprintf(stderr, " okay. Alarm rang.\n");
-
- /* Disable alarm interrupts */
- retval = ioctl(fd, RTC_AIE_OFF, 0);
- if (retval == -1) {
- perror("RTC_AIE_OFF ioctl");
- exit(errno);
- }
-
-test_PIE:
- /* Read periodic IRQ rate */
- retval = ioctl(fd, RTC_IRQP_READ, &tmp);
- if (retval == -1) {
- /* not all RTCs support periodic IRQs */
- if (errno == EINVAL) {
- fprintf(stderr, "\nNo periodic IRQ support\n");
- goto test_DATE;
- }
- perror("RTC_IRQP_READ ioctl");
- exit(errno);
- }
- fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp);
-
- fprintf(stderr, "Counting 20 interrupts at:");
- fflush(stderr);
-
- /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
- for (tmp=2; tmp<=64; tmp*=2) {
-
- retval = ioctl(fd, RTC_IRQP_SET, tmp);
- if (retval == -1) {
- /* not all RTCs can change their periodic IRQ rate */
- if (errno == EINVAL) {
- fprintf(stderr,
- "\n...Periodic IRQ rate is fixed\n");
- goto test_DATE;
- }
- perror("RTC_IRQP_SET ioctl");
- exit(errno);
- }
-
- fprintf(stderr, "\n%ldHz:\t", tmp);
- fflush(stderr);
-
- /* Enable periodic interrupts */
- retval = ioctl(fd, RTC_PIE_ON, 0);
- if (retval == -1) {
- perror("RTC_PIE_ON ioctl");
- exit(errno);
- }
-
- for (i=1; i<21; i++) {
- gettimeofday(&start, NULL);
- /* This blocks */
- retval = read(fd, &data, sizeof(unsigned long));
- if (retval == -1) {
- perror("read");
- exit(errno);
- }
- gettimeofday(&end, NULL);
- timersub(&end, &start, &diff);
- if (diff.tv_sec > 0 ||
- diff.tv_usec > ((1000000L / tmp) * 1.10)) {
- fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
- diff.tv_sec, diff.tv_usec,
- (1000000L / tmp));
- fflush(stdout);
- exit(-1);
- }
-
- fprintf(stderr, " %d",i);
- fflush(stderr);
- irqcount++;
- }
-
- /* Disable periodic interrupts */
- retval = ioctl(fd, RTC_PIE_OFF, 0);
- if (retval == -1) {
- perror("RTC_PIE_OFF ioctl");
- exit(errno);
- }
- }
-
-test_DATE:
- if (!dangerous)
- goto done;
-
- fprintf(stderr, "\nTesting problematic dates\n");
-
- for (i = 0; i < ARRAY_SIZE(cutoff_dates); i++) {
- struct rtc_time current;
-
- /* Write the new date in RTC */
- retval = ioctl(fd, RTC_SET_TIME, &cutoff_dates[i]);
- if (retval == -1) {
- perror("RTC_SET_TIME ioctl");
- close(fd);
- exit(errno);
- }
-
- /* Read back */
- retval = ioctl(fd, RTC_RD_TIME, &current);
- if (retval == -1) {
- perror("RTC_RD_TIME ioctl");
- exit(errno);
- }
-
- if(compare_dates(&cutoff_dates[i], &current)) {
- fprintf(stderr,"Setting date %d failed\n",
- cutoff_dates[i].tm_year + 1900);
- goto done;
- }
-
- cutoff_dates[i].tm_sec += 5;
-
- /* Write the new alarm in RTC */
- retval = ioctl(fd, RTC_ALM_SET, &cutoff_dates[i]);
- if (retval == -1) {
- perror("RTC_ALM_SET ioctl");
- close(fd);
- exit(errno);
- }
-
- /* Read back */
- retval = ioctl(fd, RTC_ALM_READ, &current);
- if (retval == -1) {
- perror("RTC_ALM_READ ioctl");
- exit(errno);
- }
-
- if(compare_dates(&cutoff_dates[i], &current)) {
- fprintf(stderr,"Setting alarm %d failed\n",
- cutoff_dates[i].tm_year + 1900);
- goto done;
- }
-
- fprintf(stderr, "Setting year %d is OK \n",
- cutoff_dates[i].tm_year + 1900);
- }
-done:
- fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
-
- close(fd);
-
- return 0;
-}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index d744991c0f4f..186520198de7 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -8,10 +8,11 @@ include ../lib.mk
UNAME_M := $(shell uname -m)
CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
- protection_keys test_vdso test_vsyscall
+ protection_keys test_vdso test_vsyscall mov_ss_trap
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
@@ -31,7 +32,12 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
-CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie
+CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
+
+# call32_from_64 in thunks.S uses absolute addresses.
+ifeq ($(CAN_BUILD_WITH_NOPIE),1)
+CFLAGS += -no-pie
+endif
define gen-target-rule-32
$(1) $(1)_32: $(OUTPUT)/$(1)_32
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
new file mode 100644
index 000000000000..3c3a022654f3
--- /dev/null
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS
+ *
+ * This does MOV SS from a watchpointed address followed by various
+ * types of kernel entries. A MOV SS that hits a watchpoint will queue
+ * up a #DB trap but will not actually deliver that trap. The trap
+ * will be delivered after the next instruction instead. The CPU's logic
+ * seems to be:
+ *
+ * - Any fault: drop the pending #DB trap.
+ * - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then
+ * deliver #DB.
+ * - ICEBP: enter the kernel but do not deliver the watchpoint trap
+ * - breakpoint: only one #DB is delivered (phew!)
+ *
+ * There are plenty of ways for a kernel to handle this incorrectly. This
+ * test tries to exercise all the cases.
+ *
+ * This should mostly cover CVE-2018-1087 and CVE-2018-8897.
+ */
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/user.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <setjmp.h>
+#include <sys/prctl.h>
+
+#define X86_EFLAGS_RF (1UL << 16)
+
+#if __x86_64__
+# define REG_IP REG_RIP
+#else
+# define REG_IP REG_EIP
+#endif
+
+unsigned short ss;
+extern unsigned char breakpoint_insn[];
+sigjmp_buf jmpbuf;
+static unsigned char altstack_data[SIGSTKSZ];
+
+static void enable_watchpoint(void)
+{
+ pid_t parent = getpid();
+ int status;
+
+ pid_t child = fork();
+ if (child < 0)
+ err(1, "fork");
+
+ if (child) {
+ if (waitpid(child, &status, 0) != child)
+ err(1, "waitpid for child");
+ } else {
+ unsigned long dr0, dr1, dr7;
+
+ dr0 = (unsigned long)&ss;
+ dr1 = (unsigned long)breakpoint_insn;
+ dr7 = ((1UL << 1) | /* G0 */
+ (3UL << 16) | /* RW0 = read or write */
+ (1UL << 18) | /* LEN0 = 2 bytes */
+ (1UL << 3)); /* G1, RW1 = insn */
+
+ if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0)
+ err(1, "PTRACE_ATTACH");
+
+ if (waitpid(parent, &status, 0) != parent)
+ err(1, "waitpid for child");
+
+ if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0)
+ err(1, "PTRACE_POKEUSER DR0");
+
+ if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0)
+ err(1, "PTRACE_POKEUSER DR1");
+
+ if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0)
+ err(1, "PTRACE_POKEUSER DR7");
+
+ printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7);
+
+ if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0)
+ err(1, "PTRACE_DETACH");
+
+ exit(0);
+ }
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static char const * const signames[] = {
+ [SIGSEGV] = "SIGSEGV",
+ [SIGBUS] = "SIBGUS",
+ [SIGTRAP] = "SIGTRAP",
+ [SIGILL] = "SIGILL",
+};
+
+static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n",
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
+ !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF));
+}
+
+static void handle_and_return(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot %s with RIP=%lx\n", signames[sig],
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+}
+
+static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot %s with RIP=%lx\n", signames[sig],
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+
+ siglongjmp(jmpbuf, 1);
+}
+
+int main()
+{
+ unsigned long nr;
+
+ asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss));
+ printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss);
+
+ if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0)
+ printf("\tPR_SET_PTRACER_ANY succeeded\n");
+
+ printf("\tSet up a watchpoint\n");
+ sethandler(SIGTRAP, sigtrap, 0);
+ enable_watchpoint();
+
+ printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n");
+ asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; INT3\n");
+ asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; INT 3\n");
+ asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; CS CS INT3\n");
+ asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; CSx14 INT3\n");
+ asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; INT 4\n");
+ sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss));
+
+#ifdef __i386__
+ printf("[RUN]\tMOV SS; INTO\n");
+ sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+ nr = -1;
+ asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into"
+ : [tmp] "+r" (nr) : [ss] "m" (ss));
+#endif
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; ICEBP\n");
+
+ /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */
+ sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+
+ asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss));
+ }
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; CLI\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss));
+ }
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; #PF\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]"
+ : [tmp] "=r" (nr) : [ss] "m" (ss));
+ }
+
+ /*
+ * INT $1: if #DB has DPL=3 and there isn't special handling,
+ * then the kernel will die.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; INT 1\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss));
+ }
+
+#ifdef __x86_64__
+ /*
+ * In principle, we should test 32-bit SYSCALL as well, but
+ * the calling convention is so unpredictable that it's
+ * not obviously worth the effort.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; SYSCALL\n");
+ sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+ nr = SYS_getpid;
+ /*
+ * Toggle the high bit of RSP to make it noncanonical to
+ * strengthen this test on non-SMAP systems.
+ */
+ asm volatile ("btc $63, %%rsp\n\t"
+ "mov %[ss], %%ss; syscall\n\t"
+ "btc $63, %%rsp"
+ : "+a" (nr) : [ss] "m" (ss)
+ : "rcx"
+#ifdef __x86_64__
+ , "r11"
+#endif
+ );
+ }
+#endif
+
+ printf("[RUN]\tMOV SS; breakpointed NOP\n");
+ asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss));
+
+ /*
+ * Invoking SYSENTER directly breaks all the rules. Just handle
+ * the SIGSEGV.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; SYSENTER\n");
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
+ nr = SYS_getpid;
+ asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
+ : [ss] "m" (ss) : "flags", "rcx"
+#ifdef __x86_64__
+ , "r11"
+#endif
+ );
+
+ /* We're unreachable here. SYSENTER forgets RIP. */
+ }
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; INT $0x80\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ nr = 20; /* compat getpid */
+ asm volatile ("mov %[ss], %%ss; int $0x80"
+ : "+a" (nr) : [ss] "m" (ss)
+ : "flags"
+#ifdef __x86_64__
+ , "r8", "r9", "r10", "r11"
+#endif
+ );
+ }
+
+ printf("[OK]\tI aten't dead\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
index 9c0325e1ea68..50f7e9272481 100644
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -368,6 +368,11 @@ static int expected_bnd_index = -1;
uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR 3
+#endif
+
/*
* The kernel is supposed to provide some information about the bounds
* exception in the siginfo. It should match what we have in the bounds
@@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext)
br_count++;
dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
-#define SEGV_BNDERR 3 /* failed address bound checks */
-
dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
status, ip, br_reason);
dprintf2("si_signo: %d\n", si->si_signo);
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
index b3cb7670e026..254e5436bdd9 100644
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ b/tools/testing/selftests/x86/pkey-helpers.h
@@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...)
{
va_list ap;
- va_start(ap, format);
if (!dprint_in_signal) {
+ va_start(ap, format);
vprintf(format, ap);
+ va_end(ap);
} else {
int ret;
- int len = vsnprintf(dprint_in_signal_buffer,
- DPRINT_IN_SIGNAL_BUF_SIZE,
- format, ap);
/*
- * len is amount that would have been printed,
- * but actual write is truncated at BUF_SIZE.
+ * No printf() functions are signal-safe.
+ * They deadlock easily. Write the format
+ * string to get some output, even if
+ * incomplete.
*/
- if (len > DPRINT_IN_SIGNAL_BUF_SIZE)
- len = DPRINT_IN_SIGNAL_BUF_SIZE;
- ret = write(1, dprint_in_signal_buffer, len);
+ ret = write(1, format, strlen(format));
if (ret < 0)
- abort();
+ exit(1);
}
- va_end(ap);
}
#define dprintf_level(level, args...) do { \
if (level <= DEBUG_LEVEL) \
sigsafe_printf(args); \
- fflush(NULL); \
} while (0)
#define dprintf0(args...) dprintf_level(0, args)
#define dprintf1(args...) dprintf_level(1, args)
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index f15aa5a76fe3..460b4bdf4c1e 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -72,10 +72,9 @@ extern void abort_hooks(void);
test_nr, iteration_nr); \
dprintf0("errno at assert: %d", errno); \
abort_hooks(); \
- assert(condition); \
+ exit(__LINE__); \
} \
} while (0)
-#define raw_assert(cond) assert(cond)
void cat_into_file(char *str, char *file)
{
@@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file)
* these need to be raw because they are called under
* pkey_assert()
*/
- raw_assert(fd >= 0);
+ if (fd < 0) {
+ fprintf(stderr, "error opening '%s'\n", str);
+ perror("error: ");
+ exit(__LINE__);
+ }
+
ret = write(fd, str, strlen(str));
if (ret != strlen(str)) {
perror("write to file failed");
fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
- raw_assert(0);
+ exit(__LINE__);
}
close(fd);
}
@@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me)
#ifdef __i386__
#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
+# define SYS_mprotect_key 380
#endif
+
#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
+# define SYS_pkey_alloc 381
+# define SYS_pkey_free 382
#endif
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
+
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x14
#else
#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
+# define SYS_mprotect_key 329
#endif
+
#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
+# define SYS_pkey_alloc 330
+# define SYS_pkey_free 331
#endif
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
+
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
#endif
@@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes)
}
}
-#define SEGV_BNDERR 3 /* failed address bound checks */
-#define SEGV_PKUERR 4
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR 3
+#endif
+
+#ifndef SEGV_PKUERR
+# define SEGV_PKUERR 4
+#endif
static char *si_code_str(int si_code)
{
@@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
dump_mem(pkru_ptr - 128, 256);
pkey_assert(*pkru_ptr);
- si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
- dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
- dump_mem(si_pkey_ptr - 8, 24);
- siginfo_pkey = *si_pkey_ptr;
- pkey_assert(siginfo_pkey < NR_PKEYS);
- last_si_pkey = siginfo_pkey;
-
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
(si->si_code == SEGV_BNDERR)) {
@@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
exit(4);
}
+ si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+ dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
+ dump_mem((u8 *)si_pkey_ptr - 8, 24);
+ siginfo_pkey = *si_pkey_ptr;
+ pkey_assert(siginfo_pkey < NR_PKEYS);
+ last_si_pkey = siginfo_pkey;
+
dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
/* need __rdpkru() version so we do not do shadow_pkru checking */
dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
@@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
pkru_faults++;
dprintf1("<<<<==================================================\n");
- return;
- if (trapno == 14) {
- fprintf(stderr,
- "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
- trapno, ip);
- fprintf(stderr, "si_addr %p\n", si->si_addr);
- fprintf(stderr, "REG_ERR: %lx\n",
- (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
- exit(1);
- } else {
- fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip);
- fprintf(stderr, "si_addr %p\n", si->si_addr);
- fprintf(stderr, "REG_ERR: %lx\n",
- (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
- exit(2);
- }
dprint_in_signal = 0;
}
@@ -393,10 +391,15 @@ pid_t fork_lazy_child(void)
return forkret;
}
-#define PKEY_DISABLE_ACCESS 0x1
-#define PKEY_DISABLE_WRITE 0x2
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
-u32 pkey_get(int pkey, unsigned long flags)
+static u32 hw_pkey_get(int pkey, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
u32 pkru = __rdpkru();
@@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags)
return masked_pkru;
}
-int pkey_set(int pkey, unsigned long rights, unsigned long flags)
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
{
u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
u32 old_pkru = __rdpkru();
@@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags)
pkey, flags);
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
- pkey_rights = pkey_get(pkey, syscall_flags);
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
pkey_assert(pkey_rights >= 0);
pkey_rights |= flags;
- ret = pkey_set(pkey, pkey_rights, syscall_flags);
+ ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
assert(!ret);
/*pkru and flags have the same format */
shadow_pkru |= flags << (pkey * 2);
@@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags)
pkey_assert(ret >= 0);
- pkey_rights = pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
@@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags)
{
unsigned long syscall_flags = 0;
int ret;
- int pkey_rights = pkey_get(pkey, syscall_flags);
+ int pkey_rights = hw_pkey_get(pkey, syscall_flags);
u32 orig_pkru = rdpkru();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
- dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
pkey_assert(pkey_rights >= 0);
pkey_rights |= flags;
- ret = pkey_set(pkey, pkey_rights, 0);
+ ret = hw_pkey_set(pkey, pkey_rights, 0);
/* pkru and flags have the same format */
shadow_pkru &= ~(flags << (pkey * 2));
pkey_assert(ret >= 0);
- pkey_rights = pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) pkey_get(%d): %x\n", __func__,
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
pkey, pkey, pkey_rights);
dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
@@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
struct pkey_malloc_record {
void *ptr;
long size;
+ int prot;
};
struct pkey_malloc_record *pkey_malloc_records;
+struct pkey_malloc_record *pkey_last_malloc_record;
long nr_pkey_malloc_records;
-void record_pkey_malloc(void *ptr, long size)
+void record_pkey_malloc(void *ptr, long size, int prot)
{
long i;
struct pkey_malloc_record *rec = NULL;
@@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size)
(int)(rec - pkey_malloc_records), rec, ptr, size);
rec->ptr = ptr;
rec->size = size;
+ rec->prot = prot;
+ pkey_last_malloc_record = rec;
nr_pkey_malloc_records++;
}
@@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
pkey_assert(ptr != (void *)-1);
ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
pkey_assert(!ret);
- record_pkey_malloc(ptr, size);
+ record_pkey_malloc(ptr, size, prot);
rdpkru();
dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
@@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
size = ALIGN_UP(size, HPAGE_SIZE * 2);
ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
pkey_assert(ptr != (void *)-1);
- record_pkey_malloc(ptr, size);
+ record_pkey_malloc(ptr, size, prot);
mprotect_pkey(ptr, size, prot, pkey);
dprintf1("unaligned ptr: %p\n", ptr);
@@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
pkey_assert(ptr != (void *)-1);
mprotect_pkey(ptr, size, prot, pkey);
- record_pkey_malloc(ptr, size);
+ record_pkey_malloc(ptr, size, prot);
dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
return ptr;
@@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
mprotect_pkey(ptr, size, prot, pkey);
- record_pkey_malloc(ptr, size);
+ record_pkey_malloc(ptr, size, prot);
dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
close(fd);
@@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey)
}
int last_pkru_faults;
+#define UNKNOWN_PKEY -2
void expected_pk_fault(int pkey)
{
dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
__func__, last_pkru_faults, pkru_faults);
dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
pkey_assert(last_pkru_faults + 1 == pkru_faults);
- pkey_assert(last_si_pkey == pkey);
+
+ /*
+ * For exec-only memory, we do not know the pkey in
+ * advance, so skip this check.
+ */
+ if (pkey != UNKNOWN_PKEY)
+ pkey_assert(last_si_pkey == pkey);
+
/*
* The signal handler shold have cleared out PKRU to let the
* test program continue. We now have to restore it.
@@ -939,10 +954,11 @@ void expected_pk_fault(int pkey)
last_si_pkey = -1;
}
-void do_not_expect_pk_fault(void)
-{
- pkey_assert(last_pkru_faults == pkru_faults);
-}
+#define do_not_expect_pk_fault(msg) do { \
+ if (last_pkru_faults != pkru_faults) \
+ dprintf0("unexpected PK fault: %s\n", msg); \
+ pkey_assert(last_pkru_faults == pkru_faults); \
+} while (0)
int test_fds[10] = { -1 };
int nr_test_fds;
@@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
pkey_assert(i < NR_PKEYS*2);
/*
- * There are 16 pkeys supported in hardware. One is taken
- * up for the default (0) and another can be taken up by
- * an execute-only mapping. Ensure that we can allocate
- * at least 14 (16-2).
+ * There are 16 pkeys supported in hardware. Three are
+ * allocated by the time we get here:
+ * 1. The default key (0)
+ * 2. One possibly consumed by an execute-only mapping.
+ * 3. One allocated by the test code and passed in via
+ * 'pkey' to this function.
+ * Ensure that we can allocate at least another 13 (16-3).
*/
- pkey_assert(i >= NR_PKEYS-2);
+ pkey_assert(i >= NR_PKEYS-3);
for (i = 0; i < nr_allocated_pkeys; i++) {
err = sys_pkey_free(allocated_pkeys[i]);
@@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
}
}
+/*
+ * pkey 0 is special. It is allocated by default, so you do not
+ * have to call pkey_alloc() to use it first. Make sure that it
+ * is usable.
+ */
+void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
+{
+ long size;
+ int prot;
+
+ assert(pkey_last_malloc_record);
+ size = pkey_last_malloc_record->size;
+ /*
+ * This is a bit of a hack. But mprotect() requires
+ * huge-page-aligned sizes when operating on hugetlbfs.
+ * So, make sure that we use something that's a multiple
+ * of a huge page when we can.
+ */
+ if (size >= HPAGE_SIZE)
+ size = HPAGE_SIZE;
+ prot = pkey_last_malloc_record->prot;
+
+ /* Use pkey 0 */
+ mprotect_pkey(ptr, size, prot, 0);
+
+ /* Make sure that we can set it back to the original pkey. */
+ mprotect_pkey(ptr, size, prot, pkey);
+}
+
void test_ptrace_of_child(int *ptr, u16 pkey)
{
__attribute__((__unused__)) int peek_result;
@@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
pkey_assert(ret != -1);
/* Now access from the current task, and expect NO exception: */
peek_result = read_ptr(plain_ptr);
- do_not_expect_pk_fault();
+ do_not_expect_pk_fault("read plain pointer after ptrace");
ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
pkey_assert(ret != -1);
@@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
free(plain_ptr_unaligned);
}
-void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+void *get_pointer_to_instructions(void)
{
void *p1;
- int scratch;
- int ptr_contents;
- int ret;
p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
@@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
/* Point 'p1' at the *second* page of the function: */
p1 += PAGE_SIZE;
+ /*
+ * Try to ensure we fault this in on next touch to ensure
+ * we get an instruction fault as opposed to a data one
+ */
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+
+ return p1;
+}
+
+void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ p1 = get_pointer_to_instructions();
lots_o_noops_around_write(&scratch);
ptr_contents = read_ptr(p1);
dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
@@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
*/
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault();
+ do_not_expect_pk_fault("executing on PROT_EXEC memory");
ptr_contents = read_ptr(p1);
dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
expected_pk_fault(pkey);
}
+void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ dprintf1("%s() start\n", __func__);
+
+ p1 = get_pointer_to_instructions();
+ lots_o_noops_around_write(&scratch);
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+ /* Use a *normal* mprotect(), not mprotect_pkey(): */
+ ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
+ pkey_assert(!ret);
+
+ dprintf2("pkru: %x\n", rdpkru());
+
+ /* Make sure this is an *instruction* fault */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+ lots_o_noops_around_write(&scratch);
+ do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pk_fault(UNKNOWN_PKEY);
+
+ /*
+ * Put the memory back to non-PROT_EXEC. Should clear the
+ * exec-only pkey off the VMA and allow it to be readable
+ * again. Go to PROT_NONE first to check for a kernel bug
+ * that did not clear the pkey when doing PROT_NONE.
+ */
+ ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
+ pkey_assert(!ret);
+
+ ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
+ pkey_assert(!ret);
+ ptr_contents = read_ptr(p1);
+ do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+}
+
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
{
int size = PAGE_SIZE;
@@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_kernel_gup_of_access_disabled_region,
test_kernel_gup_write_to_write_disabled_region,
test_executing_on_unreadable_memory,
+ test_implicit_mprotect_exec_only_memory,
+ test_mprotect_with_pkey_0,
test_ptrace_of_child,
test_pkey_syscalls_on_non_allocated_pkey,
test_pkey_syscalls_bad_args,
diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c
index 40370354d4c1..c9c3281077bc 100644
--- a/tools/testing/selftests/x86/test_syscall_vdso.c
+++ b/tools/testing/selftests/x86/test_syscall_vdso.c
@@ -100,12 +100,19 @@ asm (
" shl $32, %r8\n"
" orq $0x7f7f7f7f, %r8\n"
" movq %r8, %r9\n"
- " movq %r8, %r10\n"
- " movq %r8, %r11\n"
- " movq %r8, %r12\n"
- " movq %r8, %r13\n"
- " movq %r8, %r14\n"
- " movq %r8, %r15\n"
+ " incq %r9\n"
+ " movq %r9, %r10\n"
+ " incq %r10\n"
+ " movq %r10, %r11\n"
+ " incq %r11\n"
+ " movq %r11, %r12\n"
+ " incq %r12\n"
+ " movq %r12, %r13\n"
+ " incq %r13\n"
+ " movq %r13, %r14\n"
+ " incq %r14\n"
+ " movq %r14, %r15\n"
+ " incq %r15\n"
" ret\n"
" .code32\n"
" .popsection\n"
@@ -128,12 +135,13 @@ int check_regs64(void)
int err = 0;
int num = 8;
uint64_t *r64 = &regs64.r8;
+ uint64_t expected = 0x7f7f7f7f7f7f7f7fULL;
if (!kernel_is_64bit)
return 0;
do {
- if (*r64 == 0x7f7f7f7f7f7f7f7fULL)
+ if (*r64 == expected++)
continue; /* register did not change */
if (syscall_addr != (long)&int80) {
/*
@@ -147,18 +155,17 @@ int check_regs64(void)
continue;
}
} else {
- /* INT80 syscall entrypoint can be used by
+ /*
+ * INT80 syscall entrypoint can be used by
* 64-bit programs too, unlike SYSCALL/SYSENTER.
* Therefore it must preserve R12+
* (they are callee-saved registers in 64-bit C ABI).
*
- * This was probably historically not intended,
- * but R8..11 are clobbered (cleared to 0).
- * IOW: they are the only registers which aren't
- * preserved across INT80 syscall.
+ * Starting in Linux 4.17 (and any kernel that
+ * backports the change), R8..11 are preserved.
+ * Historically (and probably unintentionally), they
+ * were clobbered or zeroed.
*/
- if (*r64 == 0 && num <= 11)
- continue;
}
printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
err++;
diff --git a/tools/testing/selftests/x86/trivial_program.c b/tools/testing/selftests/x86/trivial_program.c
new file mode 100644
index 000000000000..46a447163b93
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_program.c
@@ -0,0 +1,10 @@
+/* Trivial program to check that compilation with certain flags is working. */
+
+#include <stdio.h>
+
+int
+main(void)
+{
+ puts("");
+ return 0;
+}