aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/arm64/include/asm/sysreg.h65
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h5
-rw-r--r--tools/arch/arm64/include/uapi/asm/unistd.h24
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h29
-rw-r--r--tools/arch/x86/include/asm/msr-index.h31
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h5
-rw-r--r--tools/arch/x86/include/uapi/asm/svm.h2
-rw-r--r--tools/arch/x86/lib/memset_64.S3
-rw-r--r--tools/arch/x86/lib/x86-opcode-map.txt4
-rw-r--r--tools/hv/hv_kvp_daemon.c108
-rw-r--r--tools/include/linux/cfi_types.h45
-rw-r--r--tools/include/uapi/asm-generic/mman-common.h1
-rw-r--r--tools/include/uapi/asm-generic/unistd.h4
-rw-r--r--tools/include/uapi/linux/bpf.h3
-rw-r--r--tools/include/uapi/linux/in.h2
-rw-r--r--tools/include/uapi/linux/kvm.h9
-rw-r--r--tools/include/uapi/linux/perf_event.h2
-rw-r--r--tools/include/uapi/linux/stat.h99
-rw-r--r--tools/lib/perf/Makefile14
-rw-r--r--tools/net/ynl/lib/ynl.c2
-rwxr-xr-xtools/net/ynl/pyynl/ethtool.py22
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py95
-rw-r--r--tools/objtool/arch/x86/decode.c9
-rw-r--r--tools/objtool/check.c2
-rw-r--r--tools/perf/Makefile.config1
-rw-r--r--tools/perf/arch/arm/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl1
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/sh/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/sparc/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_32.tbl3
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--tools/perf/arch/xtensa/entry/syscalls/syscall.tbl1
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h2
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fcntl.h4
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fs.h21
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/mount.h10
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/prctl.h11
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/stat.h99
-rw-r--r--tools/perf/trace/beauty/include/uapi/sound/asound.h8
-rw-r--r--tools/perf/util/evsel.c22
-rw-r--r--tools/perf/util/unwind-libunwind-local.c2
-rw-r--r--tools/sched_ext/scx_flatcg.bpf.c2
-rw-r--r--tools/scripts/syscall.tbl1
-rw-r--r--tools/testing/cxl/test/mem.c2
-rw-r--r--tools/testing/kunit/configs/all_tests.config4
-rw-r--r--tools/testing/kunit/qemu_configs/sh.py4
-rw-r--r--tools/testing/memblock/tests/basic_api.c102
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h2
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_hash_modify.c30
-rw-r--r--tools/testing/selftests/cgroup/Makefile21
-rw-r--r--tools/testing/selftests/cgroup/lib/cgroup_util.c (renamed from tools/testing/selftests/cgroup/cgroup_util.c)118
-rw-r--r--tools/testing/selftests/cgroup/lib/include/cgroup_util.h (renamed from tools/testing/selftests/cgroup/cgroup_util.h)13
-rw-r--r--tools/testing/selftests/cgroup/lib/libcgroup.mk19
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c78
l---------tools/testing/selftests/drivers/net/dsa/tc_taprio.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py4
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c55
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/psfp.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py45
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c57
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc23
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc20
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm24
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c281
-rw-r--r--tools/testing/selftests/kvm/arm64/host_sve.c127
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c85
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h41
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h7
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/processor.h141
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/ucall.h20
-rw-r--r--tools/testing/selftests/kvm/include/lru_gen_util.h51
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h23
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h1
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h1
-rw-r--r--tools/testing/selftests/kvm/include/x86/sev.h53
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c24
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/exception.S59
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/processor.c346
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/ucall.c38
-rw-r--r--tools/testing/selftests/kvm/lib/lru_gen_util.c387
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/handlers.S139
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c42
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c4
-rw-r--r--tools/testing/selftests/kvm/lib/x86/sev.c76
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/ebreak_test.c2
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c132
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c24
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/fastops_test.c165
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_cpuid.c21
-rw-r--r--tools/testing/selftests/kvm/x86/kvm_buslock_test.c135
-rw-r--r--tools/testing/selftests/kvm/x86/sev_init2_tests.c13
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c75
-rw-r--r--tools/testing/selftests/landlock/audit.h21
-rw-r--r--tools/testing/selftests/landlock/audit_test.c154
-rw-r--r--tools/testing/selftests/landlock/fs_test.c3
-rw-r--r--tools/testing/selftests/lib/config1
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c16
-rwxr-xr-xtools/testing/selftests/mm/charge_reserved_hugetlb.sh4
-rw-r--r--tools/testing/selftests/mm/compaction_test.c19
-rw-r--r--tools/testing/selftests/mm/cow.c2
-rw-r--r--tools/testing/selftests/mm/guard-regions.c16
-rwxr-xr-xtools/testing/selftests/mm/hugetlb_reparenting_test.sh2
-rw-r--r--tools/testing/selftests/mm/pkey-powerpc.h14
-rw-r--r--tools/testing/selftests/mm/pkey_util.c1
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh34
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh96
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_taprio.sh421
-rw-r--r--tools/testing/selftests/net/forwarding/tsn_lib.sh26
-rwxr-xr-xtools/testing/selftests/net/gre_ipv6_lladdr.sh177
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh5
-rw-r--r--tools/testing/selftests/pcie_bwctrl/Makefile3
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/actions.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json260
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json24
-rw-r--r--tools/testing/selftests/ublk/Makefile10
-rw-r--r--tools/testing/selftests/ublk/fault_inject.c98
-rw-r--r--tools/testing/selftests/ublk/kublk.c356
-rw-r--r--tools/testing/selftests/ublk/kublk.h47
-rw-r--r--tools/testing/selftests/ublk/stripe.c28
-rwxr-xr-xtools/testing/selftests/ublk/test_common.sh146
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_04.sh40
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_05.sh44
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_06.sh41
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_07.sh28
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_01.sh8
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_02.sh8
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_03.sh8
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_04.sh9
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_05.sh8
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_01.sh45
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_02.sh45
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_03.sh38
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_04.sh37
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_05.sh64
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_01.sh12
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_02.sh13
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_03.sh12
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_04.sh13
-rw-r--r--tools/testing/selftests/x86/bugs/Makefile3
-rwxr-xr-xtools/testing/selftests/x86/bugs/common.py164
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_indirect_alignment.py150
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_permutations.py109
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_ret_alignment.py139
-rwxr-xr-xtools/testing/selftests/x86/bugs/its_sysfs.py65
-rw-r--r--tools/testing/shared/linux.c4
-rw-r--r--tools/testing/shared/linux/cleanup.h2
-rw-r--r--tools/testing/vsock/vsock_test.c28
163 files changed, 6396 insertions, 830 deletions
diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h
index b6c5ece4fdee..690b6ebd118f 100644
--- a/tools/arch/arm64/include/asm/sysreg.h
+++ b/tools/arch/arm64/include/asm/sysreg.h
@@ -117,6 +117,7 @@
#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
+/* Data cache zero operations */
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4)
#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6)
@@ -153,11 +154,13 @@
#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3)
#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5)
-/* Data cache zero operations */
#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1)
#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3)
#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4)
+#define SYS_DC_CIVAPS sys_insn(1, 0, 7, 15, 1)
+#define SYS_DC_CIGDVAPS sys_insn(1, 0, 7, 15, 5)
+
/*
* Automatically generated definitions for system registers, the
* manual encodings below are in the process of being converted to
@@ -475,6 +478,7 @@
#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0)
#define SYS_CNTPCT_EL0 sys_reg(3, 3, 14, 0, 1)
+#define SYS_CNTVCT_EL0 sys_reg(3, 3, 14, 0, 2)
#define SYS_CNTPCTSS_EL0 sys_reg(3, 3, 14, 0, 5)
#define SYS_CNTVCTSS_EL0 sys_reg(3, 3, 14, 0, 6)
@@ -482,23 +486,36 @@
#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2)
+#define SYS_CNTV_TVAL_EL0 sys_reg(3, 3, 14, 3, 0)
#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1)
#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2)
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
#define SYS_AARCH32_CNTPCT sys_reg(0, 0, 0, 14, 0)
+#define SYS_AARCH32_CNTVCT sys_reg(0, 1, 0, 14, 0)
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
#define SYS_AARCH32_CNTPCTSS sys_reg(0, 8, 0, 14, 0)
+#define SYS_AARCH32_CNTVCTSS sys_reg(0, 9, 0, 14, 0)
#define __PMEV_op2(n) ((n) & 0x7)
#define __CNTR_CRm(n) (0x8 | (((n) >> 3) & 0x3))
+#define SYS_PMEVCNTSVRn_EL1(n) sys_reg(2, 0, 14, __CNTR_CRm(n), __PMEV_op2(n))
#define SYS_PMEVCNTRn_EL0(n) sys_reg(3, 3, 14, __CNTR_CRm(n), __PMEV_op2(n))
#define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3))
#define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n))
#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
+#define SYS_SPMCGCRn_EL1(n) sys_reg(2, 0, 9, 13, ((n) & 1))
+
+#define __SPMEV_op2(n) ((n) & 0x7)
+#define __SPMEV_crm(p, n) ((((p) & 7) << 1) | (((n) >> 3) & 1))
+#define SYS_SPMEVCNTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b000, n), __SPMEV_op2(n))
+#define SYS_SPMEVFILT2Rn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b011, n), __SPMEV_op2(n))
+#define SYS_SPMEVFILTRn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b010, n), __SPMEV_op2(n))
+#define SYS_SPMEVTYPERn_EL0(n) sys_reg(2, 3, 14, __SPMEV_crm(0b001, n), __SPMEV_op2(n))
+
#define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0)
#define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5)
@@ -518,7 +535,6 @@
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
#define SYS_VNCR_EL2 sys_reg(3, 4, 2, 2, 0)
-#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0)
@@ -604,28 +620,18 @@
/* VHE encodings for architectural EL0/1 system registers */
#define SYS_BRBCR_EL12 sys_reg(2, 5, 9, 0, 0)
-#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
-#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
-#define SYS_SCTLR2_EL12 sys_reg(3, 5, 1, 0, 3)
-#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
-#define SYS_TRFCR_EL12 sys_reg(3, 5, 1, 2, 1)
-#define SYS_SMCR_EL12 sys_reg(3, 5, 1, 2, 6)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
-#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
-#define SYS_TCR2_EL12 sys_reg(3, 5, 2, 0, 3)
#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
#define SYS_TFSR_EL12 sys_reg(3, 5, 5, 6, 0)
-#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0)
#define SYS_PMSCR_EL12 sys_reg(3, 5, 9, 9, 0)
#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
-#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1)
#define SYS_SCXTNUM_EL12 sys_reg(3, 5, 13, 0, 7)
#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
@@ -1028,8 +1034,11 @@
#define PIE_RX UL(0xa)
#define PIE_RW UL(0xc)
#define PIE_RWX UL(0xe)
+#define PIE_MASK UL(0xf)
-#define PIRx_ELx_PERM(idx, perm) ((perm) << ((idx) * 4))
+#define PIRx_ELx_BITS_PER_IDX 4
+#define PIRx_ELx_PERM_SHIFT(idx) ((idx) * PIRx_ELx_BITS_PER_IDX)
+#define PIRx_ELx_PERM_PREP(idx, perm) (((perm) & PIE_MASK) << PIRx_ELx_PERM_SHIFT(idx))
/*
* Permission Overlay Extension (POE) permission encodings.
@@ -1040,12 +1049,34 @@
#define POE_RX UL(0x3)
#define POE_W UL(0x4)
#define POE_RW UL(0x5)
-#define POE_XW UL(0x6)
-#define POE_RXW UL(0x7)
+#define POE_WX UL(0x6)
+#define POE_RWX UL(0x7)
#define POE_MASK UL(0xf)
-/* Initial value for Permission Overlay Extension for EL0 */
-#define POR_EL0_INIT POE_RXW
+#define POR_ELx_BITS_PER_IDX 4
+#define POR_ELx_PERM_SHIFT(idx) ((idx) * POR_ELx_BITS_PER_IDX)
+#define POR_ELx_PERM_GET(idx, reg) (((reg) >> POR_ELx_PERM_SHIFT(idx)) & POE_MASK)
+#define POR_ELx_PERM_PREP(idx, perm) (((perm) & POE_MASK) << POR_ELx_PERM_SHIFT(idx))
+
+/*
+ * Definitions for Guarded Control Stack
+ */
+
+#define GCS_CAP_ADDR_MASK GENMASK(63, 12)
+#define GCS_CAP_ADDR_SHIFT 12
+#define GCS_CAP_ADDR_WIDTH 52
+#define GCS_CAP_ADDR(x) FIELD_GET(GCS_CAP_ADDR_MASK, x)
+
+#define GCS_CAP_TOKEN_MASK GENMASK(11, 0)
+#define GCS_CAP_TOKEN_SHIFT 0
+#define GCS_CAP_TOKEN_WIDTH 12
+#define GCS_CAP_TOKEN(x) FIELD_GET(GCS_CAP_TOKEN_MASK, x)
+
+#define GCS_CAP_VALID_TOKEN 0x1
+#define GCS_CAP_IN_PROGRESS_TOKEN 0x5
+
+#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \
+ GCS_CAP_VALID_TOKEN)
#define ARM64_FEATURE_FIELD_BITS 4
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 6d44f8c8a18f..af9d9acaf997 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -43,9 +43,6 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_DIRTY_LOG_PAGE_OFFSET 64
-#define KVM_REG_SIZE(id) \
- (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-
struct kvm_regs {
struct user_pt_regs regs; /* sp = sp_el0 */
@@ -108,6 +105,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */
#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */
#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */
+#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */
struct kvm_vcpu_init {
__u32 target;
@@ -418,6 +416,7 @@ enum {
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
+#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h
index 9306726337fe..df36f23876e8 100644
--- a/tools/arch/arm64/include/uapi/asm/unistd.h
+++ b/tools/arch/arm64/include/uapi/asm/unistd.h
@@ -1,24 +1,2 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#define __ARCH_WANT_RENAMEAT
-#define __ARCH_WANT_NEW_STAT
-#define __ARCH_WANT_SET_GET_RLIMIT
-#define __ARCH_WANT_TIME32_SYSCALLS
-#define __ARCH_WANT_MEMFD_SECRET
-
-#include <asm-generic/unistd.h>
+#include <asm/unistd_64.h>
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 9e3fa7942e7d..51b383bea007 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -75,8 +75,8 @@
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */
#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */
#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */
-#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */
-#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */
+/* Free ( 3*32+ 6) */
+/* Free ( 3*32+ 7) */
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */
#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */
#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */
@@ -329,12 +329,14 @@
#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
+#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */
#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */
#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
+#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/
#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
@@ -377,6 +379,7 @@
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */
+#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/
@@ -434,15 +437,18 @@
#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */
/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
-#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */
-#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */
+#define X86_FEATURE_SME (19*32+ 0) /* "sme" Secure Memory Encryption */
+#define X86_FEATURE_SEV (19*32+ 1) /* "sev" Secure Encrypted Virtualization */
#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */
-#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */
-#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
-#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
-#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
+#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
+#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
+#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
+#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
@@ -455,6 +461,11 @@
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
+#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */
+#define X86_FEATURE_SRSO_BP_SPEC_REDUCE (20*32+31) /*
+ * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs.
+ * (SRSO_MSR_FIX in the official doc).
+ */
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
@@ -470,6 +481,7 @@
#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */
#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */
+#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */
/*
* BUG word(s)
@@ -521,4 +533,5 @@
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */
#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
+#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index dc1c1057f26e..e6134ef2263d 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -397,7 +397,8 @@
#define MSR_IA32_PASID_VALID BIT_ULL(31)
/* DEBUGCTLMSR bits (others vary by model): */
-#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */
+#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT)
#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2)
@@ -610,6 +611,7 @@
#define MSR_AMD_PERF_CTL 0xc0010062
#define MSR_AMD_PERF_STATUS 0xc0010063
#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
+#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
@@ -646,6 +648,7 @@
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
+#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
@@ -684,11 +687,12 @@
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
#define MSR_AMD64_SNP_RESV_BIT 18
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
-
-#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
-
#define MSR_AMD64_RMP_BASE 0xc0010132
#define MSR_AMD64_RMP_END 0xc0010133
+#define MSR_AMD64_RMP_CFG 0xc0010136
+#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0
+#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT)
+#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8)
#define MSR_SVSM_CAA 0xc001f000
@@ -699,15 +703,17 @@
#define MSR_AMD_CPPC_REQ 0xc00102b3
#define MSR_AMD_CPPC_STATUS 0xc00102b4
-#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
-#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
-#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff)
-#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff)
+/* Masks for use with MSR_AMD_CPPC_CAP1 */
+#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24)
-#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0)
-#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
-#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
-#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
+/* Masks for use with MSR_AMD_CPPC_REQ */
+#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
/* AMD Performance Counter Global Status and Control MSRs */
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
@@ -719,6 +725,7 @@
/* Zen4 */
#define MSR_ZEN4_BP_CFG 0xc001102e
+#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4
#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
/* Fam 19h MSRs */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 88585c1de416..b663d916f162 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -559,6 +559,9 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8)
+#define KVM_XEN_MSR_MIN_INDEX 0x40000000u
+#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu
+
struct kvm_xen_hvm_config {
__u32 flags;
__u32 msr;
@@ -841,6 +844,7 @@ struct kvm_sev_snp_launch_start {
};
/* Kept in sync with firmware values for simplicity. */
+#define KVM_SEV_PAGE_TYPE_INVALID 0x0
#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1
#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3
#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4
@@ -925,5 +929,6 @@ struct kvm_hyperv_eventfd {
#define KVM_X86_SEV_VM 2
#define KVM_X86_SEV_ES_VM 3
#define KVM_X86_SNP_VM 4
+#define KVM_X86_TDX_VM 5
#endif /* _ASM_X86_KVM_H */
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
index 1814b413fd57..ec1321248dac 100644
--- a/tools/arch/x86/include/uapi/asm/svm.h
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -95,6 +95,7 @@
#define SVM_EXIT_CR14_WRITE_TRAP 0x09e
#define SVM_EXIT_CR15_WRITE_TRAP 0x09f
#define SVM_EXIT_INVPCID 0x0a2
+#define SVM_EXIT_IDLE_HLT 0x0a6
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
@@ -224,6 +225,7 @@
{ SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \
{ SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
+ { SVM_EXIT_IDLE_HLT, "idle-halt" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index 0199d56cb479..d66b710d628f 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -3,6 +3,7 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
@@ -28,7 +29,7 @@
* only for the return value that is the same as the source input,
* which the compiler could/should do much better anyway.
*/
-SYM_FUNC_START(__memset)
+SYM_TYPED_FUNC_START(__memset)
ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
movq %rdi,%r9
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index caedb3ef6688..f5dd84eb55dc 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -996,8 +996,8 @@ AVXcode: 4
83: Grp1 Ev,Ib (1A),(es)
# CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL,
# CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ
-84: CTESTSCC (ev)
-85: CTESTSCC (es) | CTESTSCC (66),(es)
+84: CTESTSCC Eb,Gb (ev)
+85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es)
88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es)
8f: POP2 Bq,Rq (000),(11B),(ev)
a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es)
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 04ba035d67e9..b9ce3aab15fe 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -24,6 +24,7 @@
#include <sys/poll.h>
#include <sys/utsname.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -677,6 +678,88 @@ static void kvp_process_ipconfig_file(char *cmd,
pclose(file);
}
+static bool kvp_verify_ip_address(const void *address_string)
+{
+ char verify_buf[sizeof(struct in6_addr)];
+
+ if (inet_pton(AF_INET, address_string, verify_buf) == 1)
+ return true;
+ if (inet_pton(AF_INET6, address_string, verify_buf) == 1)
+ return true;
+ return false;
+}
+
+static void kvp_extract_routes(const char *line, void **output, size_t *remaining)
+{
+ static const char needle[] = "via ";
+ const char *match, *haystack = line;
+
+ while ((match = strstr(haystack, needle))) {
+ const char *address, *next_char;
+
+ /* Address starts after needle. */
+ address = match + strlen(needle);
+
+ /* The char following address is a space or end of line. */
+ next_char = strpbrk(address, " \t\\");
+ if (!next_char)
+ next_char = address + strlen(address) + 1;
+
+ /* Enough room for address and semicolon. */
+ if (*remaining >= (next_char - address) + 1) {
+ memcpy(*output, address, next_char - address);
+ /* Terminate string for verification. */
+ memcpy(*output + (next_char - address), "", 1);
+ if (kvp_verify_ip_address(*output)) {
+ /* Advance output buffer. */
+ *output += next_char - address;
+ *remaining -= next_char - address;
+
+ /* Each address needs a trailing semicolon. */
+ memcpy(*output, ";", 1);
+ *output += 1;
+ *remaining -= 1;
+ }
+ }
+ haystack = next_char;
+ }
+}
+
+static void kvp_get_gateway(void *buffer, size_t buffer_len)
+{
+ static const char needle[] = "default ";
+ FILE *f;
+ void *output = buffer;
+ char *line = NULL;
+ size_t alloc_size = 0, remaining = buffer_len - 1;
+ ssize_t num_chars;
+
+ /* Show route information in a single line, for each address family */
+ f = popen("ip --oneline -4 route show;ip --oneline -6 route show", "r");
+ if (!f) {
+ /* Convert buffer into C-String. */
+ memcpy(output, "", 1);
+ return;
+ }
+ while ((num_chars = getline(&line, &alloc_size, f)) > 0) {
+ /* Skip short lines. */
+ if (num_chars <= strlen(needle))
+ continue;
+ /* Skip lines without default route. */
+ if (memcmp(line, needle, strlen(needle)))
+ continue;
+ /* Remove trailing newline to simplify further parsing. */
+ if (line[num_chars - 1] == '\n')
+ line[num_chars - 1] = '\0';
+ /* Search routes after match. */
+ kvp_extract_routes(line + strlen(needle), &output, &remaining);
+ }
+ /* Convert buffer into C-String. */
+ memcpy(output, "", 1);
+ free(line);
+ pclose(f);
+}
+
static void kvp_get_ipconfig_info(char *if_name,
struct hv_kvp_ipaddr_value *buffer)
{
@@ -685,30 +768,7 @@ static void kvp_get_ipconfig_info(char *if_name,
char *p;
FILE *file;
- /*
- * Get the address of default gateway (ipv4).
- */
- sprintf(cmd, "%s %s", "ip route show dev", if_name);
- strcat(cmd, " | awk '/default/ {print $3 }'");
-
- /*
- * Execute the command to gather gateway info.
- */
- kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
- (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0);
-
- /*
- * Get the address of default gateway (ipv6).
- */
- sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name);
- strcat(cmd, " | awk '/default/ {print $3 }'");
-
- /*
- * Execute the command to gather gateway info (ipv6).
- */
- kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
- (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1);
-
+ kvp_get_gateway(buffer->gate_way, sizeof(buffer->gate_way));
/*
* Gather the DNS state.
diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h
new file mode 100644
index 000000000000..6b8713675765
--- /dev/null
+++ b/tools/include/linux/cfi_types.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Clang Control Flow Integrity (CFI) type definitions.
+ */
+#ifndef _LINUX_CFI_TYPES_H
+#define _LINUX_CFI_TYPES_H
+
+#ifdef __ASSEMBLY__
+#include <linux/linkage.h>
+
+#ifdef CONFIG_CFI_CLANG
+/*
+ * Use the __kcfi_typeid_<function> type identifier symbol to
+ * annotate indirectly called assembly functions. The compiler emits
+ * these symbols for all address-taken function declarations in C
+ * code.
+ */
+#ifndef __CFI_TYPE
+#define __CFI_TYPE(name) \
+ .4byte __kcfi_typeid_##name
+#endif
+
+#define SYM_TYPED_ENTRY(name, linkage, align...) \
+ linkage(name) ASM_NL \
+ align ASM_NL \
+ __CFI_TYPE(name) ASM_NL \
+ name:
+
+#define SYM_TYPED_START(name, linkage, align...) \
+ SYM_TYPED_ENTRY(name, linkage, align)
+
+#else /* CONFIG_CFI_CLANG */
+
+#define SYM_TYPED_START(name, linkage, align...) \
+ SYM_START(name, linkage, align)
+
+#endif /* CONFIG_CFI_CLANG */
+
+#ifndef SYM_TYPED_FUNC_START
+#define SYM_TYPED_FUNC_START(name) \
+ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* _LINUX_CFI_TYPES_H */
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index 1ea2c4c33b86..ef1c27fa3c57 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -85,6 +85,7 @@
/* compatibility flags */
#define MAP_FILE 0
+#define PKEY_UNRESTRICTED 0x0
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 88dc393c2bca..2892a45023af 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat)
__SYSCALL(__NR_listxattrat, sys_listxattrat)
#define __NR_removexattrat 466
__SYSCALL(__NR_removexattrat, sys_removexattrat)
+#define __NR_open_tree_attr 467
+__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
#undef __NR_syscalls
-#define __NR_syscalls 467
+#define __NR_syscalls 468
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 28705ae67784..fd404729b115 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4968,6 +4968,9 @@ union bpf_attr {
* the netns switch takes place from ingress to ingress without
* going through the CPU's backlog queue.
*
+ * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during
+ * the netns switch.
+ *
* The *flags* argument is reserved and must be 0. The helper is
* currently only supported for tc BPF program types at the
* ingress hook and for veth and netkit target device types. The
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 5d32d53508d9..ced0fc3c3aa5 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -79,6 +79,8 @@ enum {
#define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
#define IPPROTO_ETHERNET IPPROTO_ETHERNET
+ IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */
+#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_SMC = 256, /* Shared Memory Communications */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 502ea63b5d2e..b6ae8ad8934b 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -617,10 +617,6 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
-#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
- KVM_X86_DISABLE_EXITS_HLT | \
- KVM_X86_DISABLE_EXITS_PAUSE | \
- KVM_X86_DISABLE_EXITS_CSTATE)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
@@ -933,6 +929,7 @@ struct kvm_enable_cap {
#define KVM_CAP_PRE_FAULT_MEMORY 236
#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
#define KVM_CAP_X86_GUEST_MODE 238
+#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1070,6 +1067,10 @@ struct kvm_dirty_tlb {
#define KVM_REG_SIZE_SHIFT 52
#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL
+
+#define KVM_REG_SIZE(id) \
+ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
#define KVM_REG_SIZE_U8 0x0000000000000000ULL
#define KVM_REG_SIZE_U16 0x0010000000000000ULL
#define KVM_REG_SIZE_U32 0x0020000000000000ULL
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 0524d541d4e3..5fc753c23734 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -385,6 +385,8 @@ enum perf_event_read_format {
*
* @sample_max_stack: Max number of frame pointers in a callchain,
* should be < /proc/sys/kernel/perf_event_max_stack
+ * Max number of entries of branch stack
+ * should be < hardware limit
*/
struct perf_event_attr {
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 887a25286441..f78ee3670dd5 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -98,43 +98,93 @@ struct statx_timestamp {
*/
struct statx {
/* 0x00 */
- __u32 stx_mask; /* What results were written [uncond] */
- __u32 stx_blksize; /* Preferred general I/O size [uncond] */
- __u64 stx_attributes; /* Flags conveying information about the file [uncond] */
+ /* What results were written [uncond] */
+ __u32 stx_mask;
+
+ /* Preferred general I/O size [uncond] */
+ __u32 stx_blksize;
+
+ /* Flags conveying information about the file [uncond] */
+ __u64 stx_attributes;
+
/* 0x10 */
- __u32 stx_nlink; /* Number of hard links */
- __u32 stx_uid; /* User ID of owner */
- __u32 stx_gid; /* Group ID of owner */
- __u16 stx_mode; /* File mode */
+ /* Number of hard links */
+ __u32 stx_nlink;
+
+ /* User ID of owner */
+ __u32 stx_uid;
+
+ /* Group ID of owner */
+ __u32 stx_gid;
+
+ /* File mode */
+ __u16 stx_mode;
__u16 __spare0[1];
+
/* 0x20 */
- __u64 stx_ino; /* Inode number */
- __u64 stx_size; /* File size */
- __u64 stx_blocks; /* Number of 512-byte blocks allocated */
- __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
+ /* Inode number */
+ __u64 stx_ino;
+
+ /* File size */
+ __u64 stx_size;
+
+ /* Number of 512-byte blocks allocated */
+ __u64 stx_blocks;
+
+ /* Mask to show what's supported in stx_attributes */
+ __u64 stx_attributes_mask;
+
/* 0x40 */
- struct statx_timestamp stx_atime; /* Last access time */
- struct statx_timestamp stx_btime; /* File creation time */
- struct statx_timestamp stx_ctime; /* Last attribute change time */
- struct statx_timestamp stx_mtime; /* Last data modification time */
+ /* Last access time */
+ struct statx_timestamp stx_atime;
+
+ /* File creation time */
+ struct statx_timestamp stx_btime;
+
+ /* Last attribute change time */
+ struct statx_timestamp stx_ctime;
+
+ /* Last data modification time */
+ struct statx_timestamp stx_mtime;
+
/* 0x80 */
- __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ /* Device ID of special file [if bdev/cdev] */
+ __u32 stx_rdev_major;
__u32 stx_rdev_minor;
- __u32 stx_dev_major; /* ID of device containing file [uncond] */
+
+ /* ID of device containing file [uncond] */
+ __u32 stx_dev_major;
__u32 stx_dev_minor;
+
/* 0x90 */
__u64 stx_mnt_id;
- __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
- __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
+
+ /* Memory buffer alignment for direct I/O */
+ __u32 stx_dio_mem_align;
+
+ /* File offset alignment for direct I/O */
+ __u32 stx_dio_offset_align;
+
/* 0xa0 */
- __u64 stx_subvol; /* Subvolume identifier */
- __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */
- __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */
+ /* Subvolume identifier */
+ __u64 stx_subvol;
+
+ /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_min;
+
+ /* Max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max;
+
/* 0xb0 */
- __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */
- __u32 __spare1[1];
+ /* Max atomic write segment count */
+ __u32 stx_atomic_write_segments_max;
+
+ /* File offset alignment for direct I/O reads */
+ __u32 stx_dio_read_offset_align;
+
/* 0xb8 */
__u64 __spare3[9]; /* Spare space for future expansion */
+
/* 0x100 */
};
@@ -164,6 +214,7 @@ struct statx {
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
+#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index ffcfd777c451..7fbb50b74c00 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -42,6 +42,7 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
TEST_ARGS := $(if $(V),-v)
INCLUDES = \
+-I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \
-I$(srctree)/tools/lib/perf/include \
-I$(srctree)/tools/lib/ \
-I$(srctree)/tools/include \
@@ -99,7 +100,16 @@ $(LIBAPI)-clean:
$(call QUIET_CLEAN, libapi)
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
-$(LIBPERF_IN): FORCE
+uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm
+ifeq ($(SRCARCH),arm64)
+ syscall-y := $(uapi-asm)/unistd_64.h
+endif
+uapi-asm-generic:
+ $(if $(syscall-y),\
+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \
+ generic=include/uapi/asm-generic $(syscall-y),)
+
+$(LIBPERF_IN): uapi-asm-generic FORCE
$(Q)$(MAKE) $(build)=libperf
$(LIBPERF_A): $(LIBPERF_IN)
@@ -120,7 +130,7 @@ all: fixdep
clean: $(LIBAPI)-clean
$(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \
*.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \
- $(TESTS_STATIC) $(TESTS_SHARED)
+ $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y)
TESTS_IN = tests-in.o
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index ce32cb35007d..c4da34048ef8 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -364,7 +364,7 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
"Invalid attribute (binary %s)", policy->name);
return -1;
case YNL_PT_NUL_STR:
- if ((!policy->len || len <= policy->len) && !data[len - 1])
+ if (len && (!policy->len || len <= policy->len) && !data[len - 1])
break;
yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
"Invalid attribute (string %s)", policy->name);
diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py
index af7fddd7b085..cab6b576c876 100755
--- a/tools/net/ynl/pyynl/ethtool.py
+++ b/tools/net/ynl/pyynl/ethtool.py
@@ -338,16 +338,24 @@ def main():
print('Capabilities:')
[print(f'\t{v}') for v in bits_to_dict(tsinfo['timestamping'])]
- print(f'PTP Hardware Clock: {tsinfo["phc-index"]}')
+ print(f'PTP Hardware Clock: {tsinfo.get("phc-index", "none")}')
- print('Hardware Transmit Timestamp Modes:')
- [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])]
+ if 'tx-types' in tsinfo:
+ print('Hardware Transmit Timestamp Modes:')
+ [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])]
+ else:
+ print('Hardware Transmit Timestamp Modes: none')
+
+ if 'rx-filters' in tsinfo:
+ print('Hardware Receive Filter Modes:')
+ [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])]
+ else:
+ print('Hardware Receive Filter Modes: none')
- print('Hardware Receive Filter Modes:')
- [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])]
+ if 'stats' in tsinfo and tsinfo['stats']:
+ print('Statistics:')
+ [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()]
- print('Statistics:')
- [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()]
return
print(f'Settings for {args.device}:')
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index a1427c537030..a7f08edbc235 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -162,9 +162,15 @@ class Type(SpecAttr):
def free_needs_iter(self):
return False
- def free(self, ri, var, ref):
+ def _free_lines(self, ri, var, ref):
if self.is_multi_val() or self.presence_type() == 'len':
- ri.cw.p(f'free({var}->{ref}{self.c_name});')
+ return [f'free({var}->{ref}{self.c_name});']
+ return []
+
+ def free(self, ri, var, ref):
+ lines = self._free_lines(ri, var, ref)
+ for line in lines:
+ ri.cw.p(line)
def arg_member(self, ri):
member = self._complex_member_type(ri)
@@ -263,6 +269,10 @@ class Type(SpecAttr):
var = "req"
member = f"{var}->{'.'.join(ref)}"
+ local_vars = []
+ if self.free_needs_iter():
+ local_vars += ['unsigned int i;']
+
code = []
presence = ''
for i in range(0, len(ref)):
@@ -272,6 +282,10 @@ class Type(SpecAttr):
if i == len(ref) - 1 and self.presence_type() != 'bit':
continue
code.append(presence + ' = 1;')
+ ref_path = '.'.join(ref[:-1])
+ if ref_path:
+ ref_path += '.'
+ code += self._free_lines(ri, var, ref_path)
code += self._setter_lines(ri, member, presence)
func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}"
@@ -279,7 +293,8 @@ class Type(SpecAttr):
alloc = bool([x for x in code if 'alloc(' in x])
if free and not alloc:
func_name = '__' + func_name
- ri.cw.write_func('static inline void', func_name, body=code,
+ ri.cw.write_func('static inline void', func_name, local_vars=local_vars,
+ body=code,
args=[f'{type_name(ri, direction, deref=deref)} *{var}'] + self.arg_member(ri))
@@ -482,8 +497,7 @@ class TypeString(Type):
['unsigned int len;']
def _setter_lines(self, ri, member, presence):
- return [f"free({member});",
- f"{presence}_len = strlen({self.c_name});",
+ return [f"{presence}_len = strlen({self.c_name});",
f"{member} = malloc({presence}_len + 1);",
f'memcpy({member}, {self.c_name}, {presence}_len);',
f'{member}[{presence}_len] = 0;']
@@ -536,8 +550,7 @@ class TypeBinary(Type):
['unsigned int len;']
def _setter_lines(self, ri, member, presence):
- return [f"free({member});",
- f"{presence}_len = len;",
+ return [f"{presence}_len = len;",
f"{member} = malloc({presence}_len);",
f'memcpy({member}, {self.c_name}, {presence}_len);']
@@ -574,12 +587,14 @@ class TypeNest(Type):
def _complex_member_type(self, ri):
return self.nested_struct_type
- def free(self, ri, var, ref):
+ def _free_lines(self, ri, var, ref):
+ lines = []
at = '&'
if self.is_recursive_for_op(ri):
at = ''
- ri.cw.p(f'if ({var}->{ref}{self.c_name})')
- ri.cw.p(f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});')
+ lines += [f'if ({var}->{ref}{self.c_name})']
+ lines += [f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});']
+ return lines
def _attr_typol(self):
return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
@@ -632,15 +647,19 @@ class TypeMultiAttr(Type):
def free_needs_iter(self):
return 'type' not in self.attr or self.attr['type'] == 'nest'
- def free(self, ri, var, ref):
+ def _free_lines(self, ri, var, ref):
+ lines = []
if self.attr['type'] in scalars:
- ri.cw.p(f"free({var}->{ref}{self.c_name});")
+ lines += [f"free({var}->{ref}{self.c_name});"]
elif 'type' not in self.attr or self.attr['type'] == 'nest':
- ri.cw.p(f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)")
- ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);')
- ri.cw.p(f"free({var}->{ref}{self.c_name});")
+ lines += [
+ f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)",
+ f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);',
+ f"free({var}->{ref}{self.c_name});",
+ ]
else:
raise Exception(f"Free of MultiAttr sub-type {self.attr['type']} not supported yet")
+ return lines
def _attr_policy(self, policy):
return self.base_type._attr_policy(policy)
@@ -654,10 +673,10 @@ class TypeMultiAttr(Type):
def attr_put(self, ri, var):
if self.attr['type'] in scalars:
put_type = self.type
- ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)")
+ ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);")
elif 'type' not in self.attr or self.attr['type'] == 'nest':
- ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)")
+ ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " +
f"{self.enum_name}, &{var}->{self.c_name}[i])")
else:
@@ -666,8 +685,7 @@ class TypeMultiAttr(Type):
def _setter_lines(self, ri, member, presence):
# For multi-attr we have a count, not presence, hack up the presence
presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name
- return [f"free({member});",
- f"{member} = {self.c_name};",
+ return [f"{member} = {self.c_name};",
f"{presence} = n_{self.c_name};"]
@@ -696,8 +714,11 @@ class TypeArrayNest(Type):
def _attr_get(self, ri, var):
local_vars = ['const struct nlattr *attr2;']
get_lines = [f'attr_{self.c_name} = attr;',
- 'ynl_attr_for_each_nested(attr2, attr)',
- f'\t{var}->n_{self.c_name}++;']
+ 'ynl_attr_for_each_nested(attr2, attr) {',
+ '\tif (ynl_attr_validate(yarg, attr2))',
+ '\t\treturn YNL_PARSE_CB_ERROR;',
+ f'\t{var}->n_{self.c_name}++;',
+ '}']
return get_lines, None, local_vars
@@ -755,6 +776,7 @@ class Struct:
self.request = False
self.reply = False
self.recursive = False
+ self.in_multi_val = False # used by a MultiAttr or and legacy arrays
self.attr_list = []
self.attrs = dict()
@@ -1121,6 +1143,9 @@ class Family(SpecFamily):
self.pure_nested_structs[nested].request = True
if attr in rs_members['reply']:
self.pure_nested_structs[nested].reply = True
+ if spec.is_multi_val():
+ child = self.pure_nested_structs.get(nested)
+ child.in_multi_val = True
self._sort_pure_types()
@@ -1136,6 +1161,8 @@ class Family(SpecFamily):
struct.child_nests.update(child.child_nests)
child.request |= struct.request
child.reply |= struct.reply
+ if spec.is_multi_val():
+ child.in_multi_val = True
if attr_set in struct.child_nests:
struct.recursive = True
@@ -1399,9 +1426,9 @@ class CodeWriter:
def write_func(self, qual_ret, name, body, args=None, local_vars=None):
self.write_func_prot(qual_ret=qual_ret, name=name, args=args)
+ self.block_start()
self.write_func_lvar(local_vars=local_vars)
- self.block_start()
for line in body:
self.p(line)
self.block_end()
@@ -1644,11 +1671,23 @@ def put_req_nested_prototype(ri, struct, suffix=';'):
def put_req_nested(ri, struct):
+ local_vars = []
+ init_lines = []
+
+ local_vars.append('struct nlattr *nest;')
+ init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);")
+
+ for _, arg in struct.member_list():
+ if arg.presence_type() == 'count':
+ local_vars.append('unsigned int i;')
+ break
+
put_req_nested_prototype(ri, struct, suffix='')
ri.cw.block_start()
- ri.cw.write_func_lvar('struct nlattr *nest;')
+ ri.cw.write_func_lvar(local_vars)
- ri.cw.p("nest = ynl_attr_nest_start(nlh, attr_type);")
+ for line in init_lines:
+ ri.cw.p(line)
for _, arg in struct.member_list():
arg.attr_put(ri, "obj")
@@ -1850,6 +1889,11 @@ def print_req(ri):
local_vars += ['size_t hdr_len;',
'void *hdr;']
+ for _, attr in ri.struct["request"].member_list():
+ if attr.presence_type() == 'count':
+ local_vars += ['unsigned int i;']
+ break
+
print_prototype(ri, direction, terminate=False)
ri.cw.block_start()
ri.cw.write_func_lvar(local_vars)
@@ -2941,6 +2985,9 @@ def main():
for attr_set, struct in parsed.pure_nested_structs.items():
ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set)
print_type_full(ri, struct)
+ if struct.request and struct.in_multi_val:
+ free_rsp_nested_prototype(ri)
+ cw.nl()
for op_name, op in parsed.ops.items():
cw.p(f"/* ============== {op.enum_name} ============== */")
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 3ce7b54003c2..687c5eafb49a 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -189,6 +189,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
op2 = ins.opcode.bytes[1];
op3 = ins.opcode.bytes[2];
+ /*
+ * XXX hack, decoder is buggered and thinks 0xea is 7 bytes long.
+ */
+ if (op1 == 0xea) {
+ insn->len = 1;
+ insn->type = INSN_BUG;
+ return 0;
+ }
+
if (ins.rex_prefix.nbytes) {
rex = ins.rex_prefix.bytes[0];
rex_w = X86_REX_W(rex) >> 3;
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index b649049b6a11..b21b12ec88d9 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -225,7 +225,9 @@ static bool is_rust_noreturn(const struct symbol *func)
str_ends_with(func->name, "_4core9panicking14panic_nounwind") ||
str_ends_with(func->name, "_4core9panicking18panic_bounds_check") ||
str_ends_with(func->name, "_4core9panicking19assert_failed_inner") ||
+ str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") ||
str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") ||
+ str_ends_with(func->name, "_7___rustc17rust_begin_unwind") ||
strstr(func->name, "_4core9panicking13assert_failed") ||
strstr(func->name, "_4core9panicking11panic_const24panic_const_") ||
(strstr(func->name, "_4core5slice5index24slice_") &&
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index eea95c6c0c71..b7769a22fe1a 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -29,6 +29,7 @@ include $(srctree)/tools/scripts/Makefile.arch
$(call detected_var,SRCARCH)
CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated
+CFLAGS += -I$(OUTPUT)libperf/arch/$(SRCARCH)/include/generated/uapi
# Additional ARCH settings for ppc
ifeq ($(SRCARCH),powerpc)
diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
index 49eeb2ad8dbd..27c1d5ebcd91 100644
--- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
@@ -481,3 +481,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index c844cd5cda62..1e8c44c7b614 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -381,3 +381,4 @@
464 n64 getxattrat sys_getxattrat
465 n64 listxattrat sys_listxattrat
466 n64 removexattrat sys_removexattrat
+467 n64 open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index d8b4ab78bef0..9a084bdb8926 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -557,3 +557,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index e9115b4d8b63..a4569b96ef06 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -469,3 +469,4 @@
464 common getxattrat sys_getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
index c8cad33bf250..52a7652fcff6 100644
--- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
@@ -470,3 +470,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
index 727f99d333b3..83e45eb6c095 100644
--- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
@@ -512,3 +512,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
index 4d0fb2fba7e2..ac007ea00979 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
@@ -396,7 +396,7 @@
381 i386 pkey_alloc sys_pkey_alloc
382 i386 pkey_free sys_pkey_free
383 i386 statx sys_statx
-384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
+384 i386 arch_prctl sys_arch_prctl
385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents
386 i386 rseq sys_rseq
393 i386 semget sys_semget
@@ -472,3 +472,4 @@
464 i386 getxattrat sys_getxattrat
465 i386 listxattrat sys_listxattrat
466 i386 removexattrat sys_removexattrat
+467 i386 open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 5eb708bff1c7..cfb5ca41e30d 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -390,6 +390,7 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
index 37effc1b134e..f657a77314f8 100644
--- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
@@ -437,3 +437,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index a4499e5a6f9c..857f6646cc23 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -20,6 +20,7 @@ FILES=(
"include/uapi/linux/stat.h"
"include/linux/bits.h"
"include/vdso/bits.h"
+ "include/linux/cfi_types.h"
"include/linux/const.h"
"include/vdso/const.h"
"include/vdso/unaligned.h"
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index d18cc47e89bd..c3322eb3d686 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -392,6 +392,8 @@ struct ucred {
extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
+extern int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len,
+ void *data);
struct timespec64;
struct __kernel_timespec;
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
index 6e6907e63bfc..a15ac2fa4b20 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
@@ -155,4 +155,8 @@
#define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */
#define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */
+/* Flags for execveat2(2). */
+#define AT_EXECVE_CHECK 0x10000 /* Only perform a check if execution
+ would be allowed. */
+
#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h
index 753971770733..e762e1af650c 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fs.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h
@@ -40,6 +40,15 @@
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+/* flags for integrity meta */
+#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
+#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */
+#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */
+
+#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \
+ IO_INTEGRITY_CHK_REFTAG | \
+ IO_INTEGRITY_CHK_APPTAG)
+
#define SEEK_SET 0 /* seek relative to beginning of file */
#define SEEK_CUR 1 /* seek relative to current file position */
#define SEEK_END 2 /* seek relative to end of file */
@@ -203,10 +212,8 @@ struct fsxattr {
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
-/*
- * A jump here: 130-136 are reserved for zoned block devices
- * (see uapi/linux/blkzoned.h)
- */
+/* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */
+/* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
@@ -332,9 +339,13 @@ typedef int __bitwise __kernel_rwf_t;
/* Atomic Write */
#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040)
+/* buffered IO that drops the cache after reading or writing data */
+#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC)
+ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
+ RWF_DONTCACHE)
#define PROCFS_IOCTL_MAGIC 'f'
diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h
index c07008816aca..7fa67c2031a5 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/mount.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h
@@ -179,7 +179,12 @@ struct statmount {
__u32 opt_array; /* [str] Array of nul terminated fs options */
__u32 opt_sec_num; /* Number of security options */
__u32 opt_sec_array; /* [str] Array of nul terminated security options */
- __u64 __spare2[46];
+ __u64 supported_mask; /* Mask flags that this kernel supports */
+ __u32 mnt_uidmap_num; /* Number of uid mappings */
+ __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */
+ __u32 mnt_gidmap_num; /* Number of gid mappings */
+ __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */
+ __u64 __spare2[43];
char str[]; /* Variable size part containing strings */
};
@@ -217,6 +222,9 @@ struct mnt_id_req {
#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */
#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */
#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */
+#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */
+#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */
+#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */
/*
* Special @mnt_id values that can be passed to listmount
diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index 5c6080680cb2..15c18ef4eb11 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -353,4 +353,15 @@ struct prctl_mm_map {
*/
#define PR_LOCK_SHADOW_STACK_STATUS 76
+/*
+ * Controls the mode of timer_create() for CRIU restore operations.
+ * Enabling this allows CRIU to restore timers with explicit IDs.
+ *
+ * Don't use for normal operations as the result might be undefined.
+ */
+#define PR_TIMER_CREATE_RESTORE_IDS 77
+# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0
+# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
+# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h
index 887a25286441..f78ee3670dd5 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/stat.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h
@@ -98,43 +98,93 @@ struct statx_timestamp {
*/
struct statx {
/* 0x00 */
- __u32 stx_mask; /* What results were written [uncond] */
- __u32 stx_blksize; /* Preferred general I/O size [uncond] */
- __u64 stx_attributes; /* Flags conveying information about the file [uncond] */
+ /* What results were written [uncond] */
+ __u32 stx_mask;
+
+ /* Preferred general I/O size [uncond] */
+ __u32 stx_blksize;
+
+ /* Flags conveying information about the file [uncond] */
+ __u64 stx_attributes;
+
/* 0x10 */
- __u32 stx_nlink; /* Number of hard links */
- __u32 stx_uid; /* User ID of owner */
- __u32 stx_gid; /* Group ID of owner */
- __u16 stx_mode; /* File mode */
+ /* Number of hard links */
+ __u32 stx_nlink;
+
+ /* User ID of owner */
+ __u32 stx_uid;
+
+ /* Group ID of owner */
+ __u32 stx_gid;
+
+ /* File mode */
+ __u16 stx_mode;
__u16 __spare0[1];
+
/* 0x20 */
- __u64 stx_ino; /* Inode number */
- __u64 stx_size; /* File size */
- __u64 stx_blocks; /* Number of 512-byte blocks allocated */
- __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
+ /* Inode number */
+ __u64 stx_ino;
+
+ /* File size */
+ __u64 stx_size;
+
+ /* Number of 512-byte blocks allocated */
+ __u64 stx_blocks;
+
+ /* Mask to show what's supported in stx_attributes */
+ __u64 stx_attributes_mask;
+
/* 0x40 */
- struct statx_timestamp stx_atime; /* Last access time */
- struct statx_timestamp stx_btime; /* File creation time */
- struct statx_timestamp stx_ctime; /* Last attribute change time */
- struct statx_timestamp stx_mtime; /* Last data modification time */
+ /* Last access time */
+ struct statx_timestamp stx_atime;
+
+ /* File creation time */
+ struct statx_timestamp stx_btime;
+
+ /* Last attribute change time */
+ struct statx_timestamp stx_ctime;
+
+ /* Last data modification time */
+ struct statx_timestamp stx_mtime;
+
/* 0x80 */
- __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ /* Device ID of special file [if bdev/cdev] */
+ __u32 stx_rdev_major;
__u32 stx_rdev_minor;
- __u32 stx_dev_major; /* ID of device containing file [uncond] */
+
+ /* ID of device containing file [uncond] */
+ __u32 stx_dev_major;
__u32 stx_dev_minor;
+
/* 0x90 */
__u64 stx_mnt_id;
- __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
- __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
+
+ /* Memory buffer alignment for direct I/O */
+ __u32 stx_dio_mem_align;
+
+ /* File offset alignment for direct I/O */
+ __u32 stx_dio_offset_align;
+
/* 0xa0 */
- __u64 stx_subvol; /* Subvolume identifier */
- __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */
- __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */
+ /* Subvolume identifier */
+ __u64 stx_subvol;
+
+ /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_min;
+
+ /* Max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max;
+
/* 0xb0 */
- __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */
- __u32 __spare1[1];
+ /* Max atomic write segment count */
+ __u32 stx_atomic_write_segments_max;
+
+ /* File offset alignment for direct I/O reads */
+ __u32 stx_dio_read_offset_align;
+
/* 0xb8 */
__u64 __spare3[9]; /* Spare space for future expansion */
+
/* 0x100 */
};
@@ -164,6 +214,7 @@ struct statx {
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
+#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h
index 4cd513215bcd..5a049eeaecce 100644
--- a/tools/perf/trace/beauty/include/uapi/sound/asound.h
+++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h
@@ -716,7 +716,7 @@ enum {
* Raw MIDI section - /dev/snd/midi??
*/
-#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4)
+#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 5)
enum {
SNDRV_RAWMIDI_STREAM_OUTPUT = 0,
@@ -728,6 +728,9 @@ enum {
#define SNDRV_RAWMIDI_INFO_INPUT 0x00000002
#define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004
#define SNDRV_RAWMIDI_INFO_UMP 0x00000008
+#define SNDRV_RAWMIDI_INFO_STREAM_INACTIVE 0x00000010
+
+#define SNDRV_RAWMIDI_DEVICE_UNKNOWN 0
struct snd_rawmidi_info {
unsigned int device; /* RO/WR (control): device number */
@@ -740,7 +743,8 @@ struct snd_rawmidi_info {
unsigned char subname[32]; /* name of active or selected subdevice */
unsigned int subdevices_count;
unsigned int subdevices_avail;
- unsigned char reserved[64]; /* reserved for future use */
+ int tied_device; /* R: tied rawmidi device (UMP/legacy) */
+ unsigned char reserved[60]; /* reserved for future use */
};
#define SNDRV_RAWMIDI_MODE_FRAMING_MASK (7<<0)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1974395492d7..3c030da2e477 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2566,25 +2566,6 @@ check:
return false;
}
-static bool evsel__handle_error_quirks(struct evsel *evsel, int error)
-{
- /*
- * AMD core PMU tries to forward events with precise_ip to IBS PMU
- * implicitly. But IBS PMU has more restrictions so it can fail with
- * supported event attributes. Let's forward it back to the core PMU
- * by clearing precise_ip only if it's from precise_max (:P).
- */
- if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() &&
- evsel->core.attr.precise_ip && evsel->precise_max) {
- evsel->core.attr.precise_ip = 0;
- pr_debug2_peo("removing precise_ip on AMD\n");
- display_attr(&evsel->core.attr);
- return true;
- }
-
- return false;
-}
-
static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads,
int start_cpu_map_idx, int end_cpu_map_idx)
@@ -2730,9 +2711,6 @@ try_fallback:
if (evsel__precise_ip_fallback(evsel))
goto retry_open;
- if (evsel__handle_error_quirks(evsel, err))
- goto retry_open;
-
out_close:
if (err)
threads->err_thread = thread;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 9fb2c1343c7f..0b037e7389a0 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -371,7 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
* has to be pointed by symsrc_filename
*/
if (ofs == 0) {
- if (dso__data_get_fd(dso, machine, &fd) {
+ if (dso__data_get_fd(dso, machine, &fd)) {
ofs = elf_section_offset(fd, ".debug_frame");
dso__data_put_fd(dso);
}
diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c
index 2c720e3ecad5..fdc7170639e6 100644
--- a/tools/sched_ext/scx_flatcg.bpf.c
+++ b/tools/sched_ext/scx_flatcg.bpf.c
@@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops,
.cgroup_move = (void *)fcg_cgroup_move,
.init = (void *)fcg_init,
.exit = (void *)fcg_exit,
- .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING,
+ .flags = SCX_OPS_ENQ_EXITING,
.name = "flatcg");
diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl
index ebbdb3c42e9f..580b4e246aec 100644
--- a/tools/scripts/syscall.tbl
+++ b/tools/scripts/syscall.tbl
@@ -407,3 +407,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index f2957a3e36fe..bf9caa908f89 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -1780,7 +1780,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
- rc = devm_cxl_setup_fwctl(cxlmd);
+ rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd);
if (rc)
dev_dbg(dev, "No CXL FWCTL setup\n");
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index cdd9782f9646..422e186cf3cf 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -20,6 +20,7 @@ CONFIG_VFAT_FS=y
CONFIG_PCI=y
CONFIG_USB4=y
+CONFIG_I2C=y
CONFIG_NET=y
CONFIG_MCTP=y
@@ -43,6 +44,8 @@ CONFIG_REGMAP_BUILD=y
CONFIG_AUDIT=y
+CONFIG_PRIME_NUMBERS=y
+
CONFIG_SECURITY=y
CONFIG_SECURITY_APPARMOR=y
CONFIG_SECURITY_LANDLOCK=y
@@ -51,3 +54,4 @@ CONFIG_SOUND=y
CONFIG_SND=y
CONFIG_SND_SOC=y
CONFIG_SND_SOC_TOPOLOGY_BUILD=y
+CONFIG_SND_SOC_CS35L56_I2C=y
diff --git a/tools/testing/kunit/qemu_configs/sh.py b/tools/testing/kunit/qemu_configs/sh.py
index 78a474a5b95f..f00cb89fdef6 100644
--- a/tools/testing/kunit/qemu_configs/sh.py
+++ b/tools/testing/kunit/qemu_configs/sh.py
@@ -7,7 +7,9 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
CONFIG_MEMORY_START=0x0c000000
CONFIG_SH_RTS7751R2D=y
CONFIG_RTS7751R2D_PLUS=y
-CONFIG_SERIAL_SH_SCI=y''',
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_CMDLINE_EXTEND=y
+''',
qemu_arch='sh4',
kernel_path='arch/sh/boot/zImage',
kernel_command_line='console=ttySC1',
diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 67503089e6a0..01e836fba488 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -2434,6 +2434,107 @@ static int memblock_overlaps_region_checks(void)
return 0;
}
+#ifdef CONFIG_NUMA
+static int memblock_set_node_check(void)
+{
+ unsigned long i, max_reserved;
+ struct memblock_region *rgn;
+ void *orig_region;
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ dummy_physical_memory_init();
+ memblock_add(dummy_physical_memory_base(), MEM_SIZE);
+ orig_region = memblock.reserved.regions;
+
+ /* Equally Split range to node 0 and 1*/
+ memblock_set_node(memblock_start_of_DRAM(),
+ memblock_phys_mem_size() / 2, &memblock.memory, 0);
+ memblock_set_node(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2,
+ memblock_phys_mem_size() / 2, &memblock.memory, 1);
+
+ ASSERT_EQ(memblock.memory.cnt, 2);
+ rgn = &memblock.memory.regions[0];
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM());
+ ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2);
+ ASSERT_EQ(memblock_get_region_node(rgn), 0);
+ rgn = &memblock.memory.regions[1];
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM() + memblock_phys_mem_size() / 2);
+ ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2);
+ ASSERT_EQ(memblock_get_region_node(rgn), 1);
+
+ /* Reserve 126 regions with the last one across node boundary */
+ for (i = 0; i < 125; i++)
+ memblock_reserve(memblock_start_of_DRAM() + SZ_16 * i, SZ_8);
+
+ memblock_reserve(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2 - SZ_8,
+ SZ_16);
+
+ /*
+ * Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()")
+ * do following process to set nid to each memblock.reserved region.
+ * But it may miss some region if memblock_set_node() double the
+ * array.
+ *
+ * By checking 'max', we make sure all region nid is set properly.
+ */
+repeat:
+ max_reserved = memblock.reserved.max;
+ for_each_mem_region(rgn) {
+ int nid = memblock_get_region_node(rgn);
+
+ memblock_set_node(rgn->base, rgn->size, &memblock.reserved, nid);
+ }
+ if (max_reserved != memblock.reserved.max)
+ goto repeat;
+
+ /* Confirm each region has valid node set */
+ for_each_reserved_mem_region(rgn) {
+ ASSERT_TRUE(numa_valid_node(memblock_get_region_node(rgn)));
+ if (rgn == (memblock.reserved.regions + memblock.reserved.cnt - 1))
+ ASSERT_EQ(1, memblock_get_region_node(rgn));
+ else
+ ASSERT_EQ(0, memblock_get_region_node(rgn));
+ }
+
+ dummy_physical_memory_cleanup();
+
+ /*
+ * The current reserved.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.reserved.regions = orig_region;
+ memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_set_node_checks(void)
+{
+ prefix_reset();
+ prefix_push("memblock_set_node");
+ test_print("Running memblock_set_node tests...\n");
+
+ memblock_set_node_check();
+
+ prefix_pop();
+
+ return 0;
+}
+#else
+static int memblock_set_node_checks(void)
+{
+ return 0;
+}
+#endif
+
int memblock_basic_checks(void)
{
memblock_initialization_check();
@@ -2444,6 +2545,7 @@ int memblock_basic_checks(void)
memblock_bottom_up_checks();
memblock_trim_memory_checks();
memblock_overlaps_region_checks();
+ memblock_set_node_checks();
return 0;
}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c77c8c8e3d9b..80fb84fa3cfc 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -121,6 +121,7 @@ TARGETS += user_events
TARGETS += vDSO
TARGETS += mm
TARGETS += x86
+TARGETS += x86/bugs
TARGETS += zram
#Please keep the TARGETS list alphabetically sorted
# Run "make quicktest=1 run_tests" or
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 09f6487f58b9..5fea3209566e 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -6,6 +6,7 @@
#include "for_each_array_map_elem.skel.h"
#include "for_each_map_elem_write_key.skel.h"
#include "for_each_multi_maps.skel.h"
+#include "for_each_hash_modify.skel.h"
static unsigned int duration;
@@ -203,6 +204,40 @@ out:
for_each_multi_maps__destroy(skel);
}
+static void test_hash_modify(void)
+{
+ struct for_each_hash_modify *skel;
+ int max_entries, i, err;
+ __u64 key, val;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1
+ );
+
+ skel = for_each_hash_modify__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load"))
+ return;
+
+ max_entries = bpf_map__max_entries(skel->maps.hashmap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i;
+ err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_OK(topts.retval, "retval");
+
+out:
+ for_each_hash_modify__destroy(skel);
+}
+
void test_for_each(void)
{
if (test__start_subtest("hash_map"))
@@ -213,4 +248,6 @@ void test_for_each(void)
test_write_map_key();
if (test__start_subtest("multi_maps"))
test_multi_maps();
+ if (test__start_subtest("hash_modify"))
+ test_hash_modify();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 2d0796314862..0a99fd404f6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -68,7 +68,6 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
goto close_cli;
err = disconnect(cli);
- ASSERT_OK(err, "disconnect");
close_cli:
close(cli);
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 13a2e22f5465..863df7c0fdd0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -221,7 +221,7 @@
#define CAN_USE_GOTOL
#endif
-#if _clang_major__ >= 18
+#if __clang_major__ >= 18
#define CAN_USE_BPF_ST
#endif
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_modify.c b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c
new file mode 100644
index 000000000000..82307166f789
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Intel Corporation */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 128);
+ __type(key, __u64);
+ __type(value, __u64);
+} hashmap SEC(".maps");
+
+static int cb(struct bpf_map *map, __u64 *key, __u64 *val, void *arg)
+{
+ bpf_map_delete_elem(map, key);
+ bpf_map_update_elem(map, key, val, 0);
+ return 0;
+}
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ (void)skb;
+
+ bpf_for_each_map_elem(&hashmap, cb, NULL, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 1b897152bab6..e01584c2189a 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -21,14 +21,15 @@ TEST_GEN_PROGS += test_zswap
LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
include ../lib.mk
+include lib/libcgroup.mk
-$(OUTPUT)/test_core: cgroup_util.c
-$(OUTPUT)/test_cpu: cgroup_util.c
-$(OUTPUT)/test_cpuset: cgroup_util.c
-$(OUTPUT)/test_freezer: cgroup_util.c
-$(OUTPUT)/test_hugetlb_memcg: cgroup_util.c
-$(OUTPUT)/test_kill: cgroup_util.c
-$(OUTPUT)/test_kmem: cgroup_util.c
-$(OUTPUT)/test_memcontrol: cgroup_util.c
-$(OUTPUT)/test_pids: cgroup_util.c
-$(OUTPUT)/test_zswap: cgroup_util.c
+$(OUTPUT)/test_core: $(LIBCGROUP_O)
+$(OUTPUT)/test_cpu: $(LIBCGROUP_O)
+$(OUTPUT)/test_cpuset: $(LIBCGROUP_O)
+$(OUTPUT)/test_freezer: $(LIBCGROUP_O)
+$(OUTPUT)/test_hugetlb_memcg: $(LIBCGROUP_O)
+$(OUTPUT)/test_kill: $(LIBCGROUP_O)
+$(OUTPUT)/test_kmem: $(LIBCGROUP_O)
+$(OUTPUT)/test_memcontrol: $(LIBCGROUP_O)
+$(OUTPUT)/test_pids: $(LIBCGROUP_O)
+$(OUTPUT)/test_zswap: $(LIBCGROUP_O)
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/lib/cgroup_util.c
index 1e2d46636a0c..8832f3d1cb61 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/lib/cgroup_util.c
@@ -17,10 +17,10 @@
#include <unistd.h>
#include "cgroup_util.h"
-#include "../clone3/clone3_selftests.h"
+#include "../../clone3/clone3_selftests.h"
/* Returns read len on success, or -errno on failure. */
-static ssize_t read_text(const char *path, char *buf, size_t max_len)
+ssize_t read_text(const char *path, char *buf, size_t max_len)
{
ssize_t len;
int fd;
@@ -39,7 +39,7 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len)
}
/* Returns written len on success, or -errno on failure. */
-static ssize_t write_text(const char *path, char *buf, ssize_t len)
+ssize_t write_text(const char *path, char *buf, ssize_t len)
{
int fd;
@@ -217,7 +217,8 @@ int cg_write_numeric(const char *cgroup, const char *control, long value)
return cg_write(cgroup, control, buf);
}
-int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
+static int cg_find_root(char *root, size_t len, const char *controller,
+ bool *nsdelegate)
{
char buf[10 * PAGE_SIZE];
char *fs, *mount, *type, *options;
@@ -236,18 +237,37 @@ int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
options = strtok(NULL, delim);
strtok(NULL, delim);
strtok(NULL, delim);
-
- if (strcmp(type, "cgroup2") == 0) {
- strncpy(root, mount, len);
- if (nsdelegate)
- *nsdelegate = !!strstr(options, "nsdelegate");
- return 0;
+ if (strcmp(type, "cgroup") == 0) {
+ if (!controller || !strstr(options, controller))
+ continue;
+ } else if (strcmp(type, "cgroup2") == 0) {
+ if (controller &&
+ cg_read_strstr(mount, "cgroup.controllers", controller))
+ continue;
+ } else {
+ continue;
}
+ strncpy(root, mount, len);
+
+ if (nsdelegate)
+ *nsdelegate = !!strstr(options, "nsdelegate");
+ return 0;
+
}
return -1;
}
+int cg_find_controller_root(char *root, size_t len, const char *controller)
+{
+ return cg_find_root(root, len, controller, NULL);
+}
+
+int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
+{
+ return cg_find_root(root, len, NULL, nsdelegate);
+}
+
int cg_create(const char *cgroup)
{
return mkdir(cgroup, 0755);
@@ -488,84 +508,6 @@ int cg_run_nowait(const char *cgroup,
return pid;
}
-int get_temp_fd(void)
-{
- return open(".", O_TMPFILE | O_RDWR | O_EXCL);
-}
-
-int alloc_pagecache(int fd, size_t size)
-{
- char buf[PAGE_SIZE];
- struct stat st;
- int i;
-
- if (fstat(fd, &st))
- goto cleanup;
-
- size += st.st_size;
-
- if (ftruncate(fd, size))
- goto cleanup;
-
- for (i = 0; i < size; i += sizeof(buf))
- read(fd, buf, sizeof(buf));
-
- return 0;
-
-cleanup:
- return -1;
-}
-
-int alloc_anon(const char *cgroup, void *arg)
-{
- size_t size = (unsigned long)arg;
- char *buf, *ptr;
-
- buf = malloc(size);
- for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
- *ptr = 0;
-
- free(buf);
- return 0;
-}
-
-int is_swap_enabled(void)
-{
- char buf[PAGE_SIZE];
- const char delim[] = "\n";
- int cnt = 0;
- char *line;
-
- if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
- return -1;
-
- for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
- cnt++;
-
- return cnt > 1;
-}
-
-int set_oom_adj_score(int pid, int score)
-{
- char path[PATH_MAX];
- int fd, len;
-
- sprintf(path, "/proc/%d/oom_score_adj", pid);
-
- fd = open(path, O_WRONLY | O_APPEND);
- if (fd < 0)
- return fd;
-
- len = dprintf(fd, "%d", score);
- if (len < 0) {
- close(fd);
- return len;
- }
-
- close(fd);
- return 0;
-}
-
int proc_mount_contains(const char *option)
{
char buf[4 * PAGE_SIZE];
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
index 19b131ee7707..adb2bc193183 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/lib/include/cgroup_util.h
@@ -2,9 +2,9 @@
#include <stdbool.h>
#include <stdlib.h>
-#include "../kselftest.h"
-
+#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
+#endif
#define MB(x) (x << 20)
@@ -21,6 +21,10 @@ static inline int values_close(long a, long b, int err)
return labs(a - b) <= (a + b) / 100 * err;
}
+extern ssize_t read_text(const char *path, char *buf, size_t max_len);
+extern ssize_t write_text(const char *path, char *buf, ssize_t len);
+
+extern int cg_find_controller_root(char *root, size_t len, const char *controller);
extern int cg_find_unified_root(char *root, size_t len, bool *nsdelegate);
extern char *cg_name(const char *root, const char *name);
extern char *cg_name_indexed(const char *root, const char *name, int index);
@@ -49,11 +53,6 @@ extern int cg_enter_current_thread(const char *cgroup);
extern int cg_run_nowait(const char *cgroup,
int (*fn)(const char *cgroup, void *arg),
void *arg);
-extern int get_temp_fd(void);
-extern int alloc_pagecache(int fd, size_t size);
-extern int alloc_anon(const char *cgroup, void *arg);
-extern int is_swap_enabled(void);
-extern int set_oom_adj_score(int pid, int score);
extern int cg_wait_for_proc_count(const char *cgroup, int count);
extern int cg_killall(const char *cgroup);
int proc_mount_contains(const char *option);
diff --git a/tools/testing/selftests/cgroup/lib/libcgroup.mk b/tools/testing/selftests/cgroup/lib/libcgroup.mk
new file mode 100644
index 000000000000..7a73007204c3
--- /dev/null
+++ b/tools/testing/selftests/cgroup/lib/libcgroup.mk
@@ -0,0 +1,19 @@
+CGROUP_DIR := $(selfdir)/cgroup
+
+LIBCGROUP_C := lib/cgroup_util.c
+
+LIBCGROUP_O := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBCGROUP_C))
+
+LIBCGROUP_O_DIRS := $(shell dirname $(LIBCGROUP_O) | uniq)
+
+CFLAGS += -I$(CGROUP_DIR)/lib/include
+
+EXTRA_HDRS := $(selfdir)/clone3/clone3_selftests.h
+
+$(LIBCGROUP_O_DIRS):
+ mkdir -p $@
+
+$(LIBCGROUP_O): $(OUTPUT)/%.o : $(CGROUP_DIR)/%.c $(EXTRA_HDRS) $(LIBCGROUP_O_DIRS)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(LIBCGROUP_O)
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 16f5d74ae762..2908f4e0629d 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -24,6 +24,84 @@
static bool has_localevents;
static bool has_recursiveprot;
+int get_temp_fd(void)
+{
+ return open(".", O_TMPFILE | O_RDWR | O_EXCL);
+}
+
+int alloc_pagecache(int fd, size_t size)
+{
+ char buf[PAGE_SIZE];
+ struct stat st;
+ int i;
+
+ if (fstat(fd, &st))
+ goto cleanup;
+
+ size += st.st_size;
+
+ if (ftruncate(fd, size))
+ goto cleanup;
+
+ for (i = 0; i < size; i += sizeof(buf))
+ read(fd, buf, sizeof(buf));
+
+ return 0;
+
+cleanup:
+ return -1;
+}
+
+int alloc_anon(const char *cgroup, void *arg)
+{
+ size_t size = (unsigned long)arg;
+ char *buf, *ptr;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ free(buf);
+ return 0;
+}
+
+int is_swap_enabled(void)
+{
+ char buf[PAGE_SIZE];
+ const char delim[] = "\n";
+ int cnt = 0;
+ char *line;
+
+ if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
+ return -1;
+
+ for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
+ cnt++;
+
+ return cnt > 1;
+}
+
+int set_oom_adj_score(int pid, int score)
+{
+ char path[PATH_MAX];
+ int fd, len;
+
+ sprintf(path, "/proc/%d/oom_score_adj", pid);
+
+ fd = open(path, O_WRONLY | O_APPEND);
+ if (fd < 0)
+ return fd;
+
+ len = dprintf(fd, "%d", score);
+ if (len < 0) {
+ close(fd);
+ return len;
+ }
+
+ close(fd);
+ return 0;
+}
+
/*
* This test creates two nested cgroups with and without enabling
* the memory controller.
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index 9f271ab6ec04..6a0378e06cab 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -35,6 +35,7 @@ def test_zcrx(cfg) -> None:
rx_ring = _get_rx_ring_entries(cfg)
try:
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
@@ -48,6 +49,7 @@ def test_zcrx(cfg) -> None:
ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
def test_zcrx_oneshot(cfg) -> None:
@@ -59,6 +61,7 @@ def test_zcrx_oneshot(cfg) -> None:
rx_ring = _get_rx_ring_entries(cfg)
try:
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
@@ -72,6 +75,7 @@ def test_zcrx_oneshot(cfg) -> None:
ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
def main() -> None:
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 2bf14ac2b8c6..9d48004ff1a1 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -431,6 +431,22 @@ static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
return 0;
}
+static struct netdev_queue_id *create_queues(void)
+{
+ struct netdev_queue_id *queues;
+ size_t i = 0;
+
+ queues = calloc(num_queues, sizeof(*queues));
+ for (i = 0; i < num_queues; i++) {
+ queues[i]._present.type = 1;
+ queues[i]._present.id = 1;
+ queues[i].type = NETDEV_QUEUE_TYPE_RX;
+ queues[i].id = start_queue + i;
+ }
+
+ return queues;
+}
+
int do_server(struct memory_buffer *mem)
{
char ctrl_data[sizeof(int) * 20000];
@@ -448,7 +464,6 @@ int do_server(struct memory_buffer *mem)
char buffer[256];
int socket_fd;
int client_fd;
- size_t i = 0;
int ret;
ret = parse_address(server_ip, atoi(port), &server_sin);
@@ -471,16 +486,7 @@ int do_server(struct memory_buffer *mem)
sleep(1);
- queues = malloc(sizeof(*queues) * num_queues);
-
- for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
- }
-
- if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
+ if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
error(1, 0, "Failed to bind\n");
tmp_mem = malloc(mem->size);
@@ -545,7 +551,6 @@ int do_server(struct memory_buffer *mem)
goto cleanup;
}
- i++;
for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
if (cm->cmsg_level != SOL_SOCKET ||
(cm->cmsg_type != SCM_DEVMEM_DMABUF &&
@@ -630,10 +635,8 @@ cleanup:
void run_devmem_tests(void)
{
- struct netdev_queue_id *queues;
struct memory_buffer *mem;
struct ynl_sock *ys;
- size_t i = 0;
mem = provider->alloc(getpagesize() * NUM_PAGES);
@@ -641,38 +644,24 @@ void run_devmem_tests(void)
if (configure_rss())
error(1, 0, "rss error\n");
- queues = calloc(num_queues, sizeof(*queues));
-
if (configure_headersplit(1))
error(1, 0, "Failed to configure header split\n");
- if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
+ if (!bind_rx_queue(ifindex, mem->fd,
+ calloc(num_queues, sizeof(struct netdev_queue_id)),
+ num_queues, &ys))
error(1, 0, "Binding empty queues array should have failed\n");
- for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
- }
-
if (configure_headersplit(0))
error(1, 0, "Failed to configure header split\n");
- if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
+ if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
error(1, 0, "Configure dmabuf with header split off should have failed\n");
if (configure_headersplit(1))
error(1, 0, "Failed to configure header split\n");
- for (i = 0; i < num_queues; i++) {
- queues[i]._present.type = 1;
- queues[i]._present.id = 1;
- queues[i].type = NETDEV_QUEUE_TYPE_RX;
- queues[i].id = start_queue + i;
- }
-
- if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys))
+ if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys))
error(1, 0, "Failed to bind\n");
/* Deactivating a bound queue should not be legal */
diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
index bed748dde4b0..8972f42dfe03 100755
--- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
@@ -266,18 +266,14 @@ run_test()
"${base_time}" \
"${CYCLE_TIME_NS}" \
"${SHIFT_TIME_NS}" \
+ "${GATE_DURATION_NS}" \
"${NUM_PKTS}" \
"${STREAM_VID}" \
"${STREAM_PRIO}" \
"" \
"${isochron_dat}"
- # Count all received packets by looking at the non-zero RX timestamps
- received=$(isochron report \
- --input-file "${isochron_dat}" \
- --printf-format "%u\n" --printf-args "R" | \
- grep -w -v '0' | wc -l)
-
+ received=$(isochron_report_num_received "${isochron_dat}")
if [ "${received}" = "${expected}" ]; then
RET=0
else
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index 4b6822866066..af8df2313a3b 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -9,11 +9,11 @@ from lib.py import EthtoolFamily, NetDrvEpEnv
from lib.py import bkg, cmd, wait_port_listen, rand_port
from lib.py import defer, ethtool, ip
-remote_ifname=""
no_sleep=False
def _test_v4(cfg) -> None:
- cfg.require_ipver("4")
+ if not cfg.addr_v["4"]:
+ return
cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"])
cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
@@ -21,7 +21,8 @@ def _test_v4(cfg) -> None:
cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
def _test_v6(cfg) -> None:
- cfg.require_ipver("6")
+ if not cfg.addr_v["6"]:
+ return
cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"])
cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote)
@@ -57,7 +58,7 @@ def _set_offload_checksum(cfg, netnl, on) -> None:
def _set_xdp_generic_sb_on(cfg) -> None:
prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
- cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True)
defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off")
@@ -66,8 +67,8 @@ def _set_xdp_generic_sb_on(cfg) -> None:
def _set_xdp_generic_mb_on(cfg) -> None:
prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
- cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote)
- defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote)
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog))
defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off")
@@ -76,7 +77,7 @@ def _set_xdp_generic_mb_on(cfg) -> None:
def _set_xdp_native_sb_on(cfg) -> None:
prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
- cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True)
defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
@@ -93,8 +94,8 @@ def _set_xdp_native_sb_on(cfg) -> None:
def _set_xdp_native_mb_on(cfg) -> None:
prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
- cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote)
- defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote)
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
try:
cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True)
defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
@@ -112,18 +113,15 @@ def _set_xdp_offload_on(cfg) -> None:
except Exception as e:
raise KsftSkipEx('device does not support offloaded XDP')
defer(ip, f"link set dev {cfg.ifname} xdpoffload off")
- cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
if no_sleep != True:
time.sleep(10)
def get_interface_info(cfg) -> None:
- global remote_ifname
global no_sleep
- remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_addr_v['4']} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout
- remote_ifname = remote_info.rstrip('\n')
- if remote_ifname == "":
+ if cfg.remote_ifname == "":
raise KsftFailEx('Can not get remote interface')
local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim":
@@ -136,15 +134,25 @@ def set_interface_init(cfg) -> None:
cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True)
cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True)
cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True)
- cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+def test_default_v4(cfg, netnl) -> None:
+ cfg.require_ipver("4")
-def test_default(cfg, netnl) -> None:
_set_offload_checksum(cfg, netnl, "off")
_test_v4(cfg)
- _test_v6(cfg)
_test_tcp(cfg)
_set_offload_checksum(cfg, netnl, "on")
_test_v4(cfg)
+ _test_tcp(cfg)
+
+def test_default_v6(cfg, netnl) -> None:
+ cfg.require_ipver("6")
+
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
_test_v6(cfg)
_test_tcp(cfg)
@@ -202,7 +210,8 @@ def main() -> None:
with NetDrvEpEnv(__file__) as cfg:
get_interface_info(cfg)
set_interface_init(cfg)
- ksft_run([test_default,
+ ksft_run([test_default_v4,
+ test_default_v6,
test_xdp_generic_sb,
test_xdp_generic_mb,
test_xdp_native_sb,
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
index 4a2d5c454fd1..59a71f22fb11 100644
--- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -48,8 +48,16 @@ static uint64_t get_mnt_id(struct __test_metadata *const _metadata,
static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
+static const int mark_cmds[] = {
+ FAN_MARK_ADD,
+ FAN_MARK_REMOVE,
+ FAN_MARK_FLUSH
+};
+
+#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
+
FIXTURE(fanotify) {
- int fan_fd;
+ int fan_fd[NUM_FAN_FDS];
char buf[256];
unsigned int rem;
void *next;
@@ -61,7 +69,7 @@ FIXTURE(fanotify) {
FIXTURE_SETUP(fanotify)
{
- int ret;
+ int i, ret;
ASSERT_EQ(unshare(CLONE_NEWNS), 0);
@@ -89,20 +97,34 @@ FIXTURE_SETUP(fanotify)
self->root_id = get_mnt_id(_metadata, "/");
ASSERT_NE(self->root_id, 0);
- self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0);
- ASSERT_GE(self->fan_fd, 0);
-
- ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS,
- FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL);
- ASSERT_EQ(ret, 0);
+ for (i = 0; i < NUM_FAN_FDS; i++) {
+ self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
+ 0);
+ ASSERT_GE(self->fan_fd[i], 0);
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ // On fd[0] we do an extra ADD that changes nothing.
+ // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
+ ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ }
self->rem = 0;
}
FIXTURE_TEARDOWN(fanotify)
{
+ int i;
+
ASSERT_EQ(self->rem, 0);
- close(self->fan_fd);
+ for (i = 0; i < NUM_FAN_FDS; i++)
+ close(self->fan_fd[i]);
ASSERT_EQ(fchdir(self->orig_root), 0);
@@ -123,8 +145,21 @@ static uint64_t expect_notify(struct __test_metadata *const _metadata,
unsigned int thislen;
if (!self->rem) {
- ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf));
- ASSERT_GT(len, 0);
+ ssize_t len;
+ int i;
+
+ for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
+ len = read(self->fan_fd[i], self->buf,
+ sizeof(self->buf));
+ if (i > 0) {
+ // Groups 1,2 should get EAGAIN
+ ASSERT_EQ(len, -1);
+ ASSERT_EQ(errno, EAGAIN);
+ } else {
+ // Group 0 should get events
+ ASSERT_GT(len, 0);
+ }
+ }
self->rem = len;
self->next = (void *) self->buf;
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
index 6b94b678741a..f656bccb1a14 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
@@ -7,11 +7,32 @@
MAX_ARGS=128
EXCEED_ARGS=$((MAX_ARGS + 1))
+# bash and dash evaluate variables differently.
+# dash will evaluate '\\' every time it is read whereas bash does not.
+#
+# TEST_STRING="$TEST_STRING \\$i"
+# echo $TEST_STRING
+#
+# With i=123
+# On bash, that will print "\123"
+# but on dash, that will print the escape sequence of \123 as the \ will
+# be interpreted again in the echo.
+#
+# Set a variable "bs" to save a double backslash, then echo that
+# to "ts" to see if $ts changed or not. If it changed, it's dash,
+# if not, it's bash, and then bs can equal a single backslash.
+bs='\\'
+ts=`echo $bs`
+if [ "$ts" = '\\' ]; then
+ # this is bash
+ bs='\'
+fi
+
check_max_args() { # event_header
TEST_STRING=$1
# Acceptable
for i in `seq 1 $MAX_ARGS`; do
- TEST_STRING="$TEST_STRING \\$i"
+ TEST_STRING="$TEST_STRING $bs$i"
done
echo "$TEST_STRING" >> dynamic_events
echo > dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
index 118247b8dd84..c62165fabd0c 100644
--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -80,6 +80,26 @@ if [ $misscnt -gt 0 ]; then
exit_fail
fi
+# Check strings too
+if [ -f events/syscalls/sys_enter_openat/filter ]; then
+ DIRNAME=`basename $TMPDIR`
+ echo "filename.ustring ~ \"*$DIRNAME*\"" > events/syscalls/sys_enter_openat/filter
+ echo 1 > events/syscalls/sys_enter_openat/enable
+ echo 1 > tracing_on
+ ls /bin/sh
+ nocnt=`grep openat trace | wc -l`
+ ls $TMPDIR
+ echo 0 > tracing_on
+ hitcnt=`grep openat trace | wc -l`;
+ echo 0 > events/syscalls/sys_enter_openat/enable
+ if [ $nocnt -gt 0 ]; then
+ exit_fail
+ fi
+ if [ $hitcnt -eq 0 ]; then
+ exit_fail
+ fi
+fi
+
reset_events_filter
exit 0
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 20af35a91d6f..d9fffe06d3ea 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -3,7 +3,7 @@ top_srcdir = ../../../..
include $(top_srcdir)/scripts/subarch.include
ARCH ?= $(SUBARCH)
-ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64))
+ifeq ($(ARCH),$(filter $(ARCH),arm64 s390 riscv x86 x86_64 loongarch))
# Top-level selftests allows ARCH=x86_64 :-(
ifeq ($(ARCH),x86_64)
ARCH := x86
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index f62b0a5aba35..38b95998e1e6 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -8,6 +8,7 @@ LIBKVM += lib/elf.c
LIBKVM += lib/guest_modes.c
LIBKVM += lib/io.c
LIBKVM += lib/kvm_util.c
+LIBKVM += lib/lru_gen_util.c
LIBKVM += lib/memstress.c
LIBKVM += lib/guest_sprintf.c
LIBKVM += lib/rbtree.c
@@ -47,6 +48,10 @@ LIBKVM_riscv += lib/riscv/handlers.S
LIBKVM_riscv += lib/riscv/processor.c
LIBKVM_riscv += lib/riscv/ucall.c
+LIBKVM_loongarch += lib/loongarch/processor.c
+LIBKVM_loongarch += lib/loongarch/ucall.c
+LIBKVM_loongarch += lib/loongarch/exception.S
+
# Non-compiled test targets
TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
@@ -66,6 +71,7 @@ TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86 += x86/feature_msrs_test
TEST_GEN_PROGS_x86 += x86/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86 += x86/fastops_test
TEST_GEN_PROGS_x86 += x86/fix_hypercall_test
TEST_GEN_PROGS_x86 += x86/hwcr_msr_test
TEST_GEN_PROGS_x86 += x86/hyperv_clock
@@ -78,6 +84,7 @@ TEST_GEN_PROGS_x86 += x86/hyperv_svm_test
TEST_GEN_PROGS_x86 += x86/hyperv_tlb_flush
TEST_GEN_PROGS_x86 += x86/kvm_clock_test
TEST_GEN_PROGS_x86 += x86/kvm_pv_test
+TEST_GEN_PROGS_x86 += x86/kvm_buslock_test
TEST_GEN_PROGS_x86 += x86/monitor_mwait_test
TEST_GEN_PROGS_x86 += x86/nested_emulation_test
TEST_GEN_PROGS_x86 += x86/nested_exceptions_test
@@ -147,6 +154,7 @@ TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
+TEST_GEN_PROGS_arm64 += arm64/host_sve
TEST_GEN_PROGS_arm64 += arm64/hypercalls
TEST_GEN_PROGS_arm64 += arm64/mmio_abort
TEST_GEN_PROGS_arm64 += arm64/page_fault_test
@@ -190,6 +198,19 @@ TEST_GEN_PROGS_riscv += coalesced_io_test
TEST_GEN_PROGS_riscv += get-reg-list
TEST_GEN_PROGS_riscv += steal_time
+TEST_GEN_PROGS_loongarch += coalesced_io_test
+TEST_GEN_PROGS_loongarch += demand_paging_test
+TEST_GEN_PROGS_loongarch += dirty_log_perf_test
+TEST_GEN_PROGS_loongarch += dirty_log_test
+TEST_GEN_PROGS_loongarch += guest_print_test
+TEST_GEN_PROGS_loongarch += hardware_disable_test
+TEST_GEN_PROGS_loongarch += kvm_binary_stats_test
+TEST_GEN_PROGS_loongarch += kvm_create_max_vcpus
+TEST_GEN_PROGS_loongarch += kvm_page_table_test
+TEST_GEN_PROGS_loongarch += memslot_modification_stress_test
+TEST_GEN_PROGS_loongarch += memslot_perf_test
+TEST_GEN_PROGS_loongarch += set_memory_region_test
+
SPLIT_TESTS += arch_timer
SPLIT_TESTS += get-reg-list
@@ -204,6 +225,7 @@ OVERRIDE_TARGETS = 1
# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
# which causes the environment variable to override the makefile).
include ../lib.mk
+include ../cgroup/lib/libcgroup.mk
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
@@ -257,7 +279,7 @@ LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) $(LIBCGROUP_O)
SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH)/%.o, $(SPLIT_TESTS))
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 447e619cf856..da7196fd1b23 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -7,9 +7,11 @@
* This test measures the performance effects of KVM's access tracking.
* Access tracking is driven by the MMU notifiers test_young, clear_young, and
* clear_flush_young. These notifiers do not have a direct userspace API,
- * however the clear_young notifier can be triggered by marking a pages as idle
- * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to
- * enable access tracking on guest memory.
+ * however the clear_young notifier can be triggered either by
+ * 1. marking a pages as idle in /sys/kernel/mm/page_idle/bitmap OR
+ * 2. adding a new MGLRU generation using the lru_gen debugfs file.
+ * This test leverages page_idle to enable access tracking on guest memory
+ * unless MGLRU is enabled, in which case MGLRU is used.
*
* To measure performance this test runs a VM with a configurable number of
* vCPUs that each touch every page in disjoint regions of memory. Performance
@@ -17,10 +19,11 @@
* predefined region.
*
* Note that a deterministic correctness test of access tracking is not possible
- * by using page_idle as it exists today. This is for a few reasons:
+ * by using page_idle or MGLRU aging as it exists today. This is for a few
+ * reasons:
*
- * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This
- * means subsequent guest accesses are not guaranteed to see page table
+ * 1. page_idle and MGLRU only issue clear_young notifiers, which lack a TLB flush.
+ * This means subsequent guest accesses are not guaranteed to see page table
* updates made by KVM until some time in the future.
*
* 2. page_idle only operates on LRU pages. Newly allocated pages are not
@@ -48,9 +51,17 @@
#include "guest_modes.h"
#include "processor.h"
+#include "cgroup_util.h"
+#include "lru_gen_util.h"
+
+static const char *TEST_MEMCG_NAME = "access_tracking_perf_test";
+
/* Global variable used to synchronize all of the vCPU threads. */
static int iteration;
+/* The cgroup memory controller root. Needed for lru_gen-based aging. */
+char cgroup_root[PATH_MAX];
+
/* Defines what vCPU threads should do during a given iteration. */
static enum {
/* Run the vCPU to access all its memory. */
@@ -65,6 +76,25 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
/* Whether to overlap the regions of memory vCPUs access. */
static bool overlap_memory_access;
+/*
+ * If the test should only warn if there are too many idle pages (i.e., it is
+ * expected).
+ * -1: Not yet set.
+ * 0: We do not expect too many idle pages, so FAIL if too many idle pages.
+ * 1: Having too many idle pages is expected, so merely print a warning if
+ * too many idle pages are found.
+ */
+static int idle_pages_warn_only = -1;
+
+/* Whether or not to use MGLRU instead of page_idle for access tracking */
+static bool use_lru_gen;
+
+/* Total number of pages to expect in the memcg after touching everything */
+static long test_pages;
+
+/* Last generation we found the pages in */
+static int lru_gen_last_gen = -1;
+
struct test_params {
/* The backing source for the region of memory. */
enum vm_mem_backing_src_type backing_src;
@@ -123,8 +153,24 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn)
"Set page_idle bits for PFN 0x%" PRIx64, pfn);
}
-static void mark_vcpu_memory_idle(struct kvm_vm *vm,
- struct memstress_vcpu_args *vcpu_args)
+static void too_many_idle_pages(long idle_pages, long total_pages, int vcpu_idx)
+{
+ char prefix[18] = {};
+
+ if (vcpu_idx >= 0)
+ snprintf(prefix, 18, "vCPU%d: ", vcpu_idx);
+
+ TEST_ASSERT(idle_pages_warn_only,
+ "%sToo many pages still idle (%lu out of %lu)",
+ prefix, idle_pages, total_pages);
+
+ printf("WARNING: %sToo many pages still idle (%lu out of %lu), "
+ "this will affect performance results.\n",
+ prefix, idle_pages, total_pages);
+}
+
+static void pageidle_mark_vcpu_memory_idle(struct kvm_vm *vm,
+ struct memstress_vcpu_args *vcpu_args)
{
int vcpu_idx = vcpu_args->vcpu_idx;
uint64_t base_gva = vcpu_args->gva;
@@ -177,27 +223,79 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
* arbitrary; high enough that we ensure most memory access went through
* access tracking but low enough as to not make the test too brittle
* over time and across architectures.
- *
- * When running the guest as a nested VM, "warn" instead of asserting
- * as the TLB size is effectively unlimited and the KVM doesn't
- * explicitly flush the TLB when aging SPTEs. As a result, more pages
- * are cached and the guest won't see the "idle" bit cleared.
*/
- if (still_idle >= pages / 10) {
-#ifdef __x86_64__
- TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
- "vCPU%d: Too many pages still idle (%lu out of %lu)",
- vcpu_idx, still_idle, pages);
-#endif
- printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
- "this will affect performance results.\n",
- vcpu_idx, still_idle, pages);
- }
+ if (still_idle >= pages / 10)
+ too_many_idle_pages(still_idle, pages,
+ overlap_memory_access ? -1 : vcpu_idx);
close(page_idle_fd);
close(pagemap_fd);
}
+int find_generation(struct memcg_stats *stats, long total_pages)
+{
+ /*
+ * For finding the generation that contains our pages, use the same
+ * 90% threshold that page_idle uses.
+ */
+ int gen = lru_gen_find_generation(stats, total_pages * 9 / 10);
+
+ if (gen >= 0)
+ return gen;
+
+ if (!idle_pages_warn_only) {
+ TEST_FAIL("Could not find a generation with 90%% of guest memory (%ld pages).",
+ total_pages * 9 / 10);
+ return gen;
+ }
+
+ /*
+ * We couldn't find a generation with 90% of guest memory, which can
+ * happen if access tracking is unreliable. Simply look for a majority
+ * of pages.
+ */
+ puts("WARNING: Couldn't find a generation with 90% of guest memory. "
+ "Performance results may not be accurate.");
+ gen = lru_gen_find_generation(stats, total_pages / 2);
+ TEST_ASSERT(gen >= 0,
+ "Could not find a generation with 50%% of guest memory (%ld pages).",
+ total_pages / 2);
+ return gen;
+}
+
+static void lru_gen_mark_memory_idle(struct kvm_vm *vm)
+{
+ struct timespec ts_start;
+ struct timespec ts_elapsed;
+ struct memcg_stats stats;
+ int new_gen;
+
+ /* Make a new generation */
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+ lru_gen_do_aging(&stats, TEST_MEMCG_NAME);
+ ts_elapsed = timespec_elapsed(ts_start);
+
+ /* Check the generation again */
+ new_gen = find_generation(&stats, test_pages);
+
+ /*
+ * This function should only be invoked with newly-accessed pages,
+ * so pages should always move to a newer generation.
+ */
+ if (new_gen <= lru_gen_last_gen) {
+ /* We did not move to a newer generation. */
+ long idle_pages = lru_gen_sum_memcg_stats_for_gen(lru_gen_last_gen,
+ &stats);
+
+ too_many_idle_pages(min_t(long, idle_pages, test_pages),
+ test_pages, -1);
+ }
+ pr_info("%-30s: %ld.%09lds\n",
+ "Mark memory idle (lru_gen)", ts_elapsed.tv_sec,
+ ts_elapsed.tv_nsec);
+ lru_gen_last_gen = new_gen;
+}
+
static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall)
{
struct ucall uc;
@@ -237,7 +335,7 @@ static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args)
assert_ucall(vcpu, UCALL_SYNC);
break;
case ITERATION_MARK_IDLE:
- mark_vcpu_memory_idle(vm, vcpu_args);
+ pageidle_mark_vcpu_memory_idle(vm, vcpu_args);
break;
}
@@ -289,15 +387,18 @@ static void access_memory(struct kvm_vm *vm, int nr_vcpus,
static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus)
{
+ if (use_lru_gen)
+ return lru_gen_mark_memory_idle(vm);
+
/*
* Even though this parallelizes the work across vCPUs, this is still a
* very slow operation because page_idle forces the test to mark one pfn
- * at a time and the clear_young notifier serializes on the KVM MMU
+ * at a time and the clear_young notifier may serialize on the KVM MMU
* lock.
*/
pr_debug("Marking VM memory idle (slow)...\n");
iteration_work = ITERATION_MARK_IDLE;
- run_iteration(vm, nr_vcpus, "Mark memory idle");
+ run_iteration(vm, nr_vcpus, "Mark memory idle (page_idle)");
}
static void run_test(enum vm_guest_mode mode, void *arg)
@@ -309,11 +410,38 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
params->backing_src, !overlap_memory_access);
+ /*
+ * If guest_page_size is larger than the host's page size, the
+ * guest (memstress) will only fault in a subset of the host's pages.
+ */
+ test_pages = params->nr_vcpus * params->vcpu_memory_bytes /
+ max(memstress_args.guest_page_size,
+ (uint64_t)getpagesize());
+
memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main);
pr_info("\n");
access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory");
+ if (use_lru_gen) {
+ struct memcg_stats stats;
+
+ /*
+ * Do a page table scan now. Following initial population, aging
+ * may not cause the pages to move to a newer generation. Do
+ * an aging pass now so that future aging passes always move
+ * pages to a newer generation.
+ */
+ printf("Initial aging pass (lru_gen)\n");
+ lru_gen_do_aging(&stats, TEST_MEMCG_NAME);
+ TEST_ASSERT(lru_gen_sum_memcg_stats(&stats) >= test_pages,
+ "Not all pages accounted for (looking for %ld). "
+ "Was the memcg set up correctly?", test_pages);
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Re-populating memory");
+ lru_gen_read_memcg_stats(&stats, TEST_MEMCG_NAME);
+ lru_gen_last_gen = find_generation(&stats, test_pages);
+ }
+
/* As a control, read and write to the populated memory first. */
access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory");
access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory");
@@ -328,6 +456,37 @@ static void run_test(enum vm_guest_mode mode, void *arg)
memstress_destroy_vm(vm);
}
+static int access_tracking_unreliable(void)
+{
+#ifdef __x86_64__
+ /*
+ * When running nested, the TLB size may be effectively unlimited (for
+ * example, this is the case when running on KVM L0), and KVM doesn't
+ * explicitly flush the TLB when aging SPTEs. As a result, more pages
+ * are cached and the guest won't see the "idle" bit cleared.
+ */
+ if (this_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ puts("Skipping idle page count sanity check, because the test is run nested");
+ return 1;
+ }
+#endif
+ /*
+ * When NUMA balancing is enabled, guest memory will be unmapped to get
+ * NUMA faults, dropping the Accessed bits.
+ */
+ if (is_numa_balancing_enabled()) {
+ puts("Skipping idle page count sanity check, because NUMA balancing is enabled");
+ return 1;
+ }
+ return 0;
+}
+
+static int run_test_for_each_guest_mode(const char *cgroup, void *arg)
+{
+ for_each_guest_mode(run_test, arg);
+ return 0;
+}
+
static void help(char *name)
{
puts("");
@@ -342,11 +501,22 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
+ printf(" -w: Control whether the test warns or fails if more than 10%%\n"
+ " of pages are still seen as idle/old after accessing guest\n"
+ " memory. >0 == warn only, 0 == fail, <0 == auto. For auto\n"
+ " mode, the test fails by default, but switches to warn only\n"
+ " if NUMA balancing is enabled or the test detects it's running\n"
+ " in a VM.\n");
backing_src_help("-s");
puts("");
exit(0);
}
+void destroy_cgroup(char *cg)
+{
+ printf("Destroying cgroup: %s\n", cg);
+}
+
int main(int argc, char *argv[])
{
struct test_params params = {
@@ -354,12 +524,13 @@ int main(int argc, char *argv[])
.vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
.nr_vcpus = 1,
};
+ char *new_cg = NULL;
int page_idle_fd;
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:b:v:os:w:")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -376,6 +547,11 @@ int main(int argc, char *argv[])
case 's':
params.backing_src = parse_backing_src_type(optarg);
break;
+ case 'w':
+ idle_pages_warn_only =
+ atoi_non_negative("Idle pages warning",
+ optarg);
+ break;
case 'h':
default:
help(argv[0]);
@@ -383,12 +559,53 @@ int main(int argc, char *argv[])
}
}
- page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
- __TEST_REQUIRE(page_idle_fd >= 0,
- "CONFIG_IDLE_PAGE_TRACKING is not enabled");
- close(page_idle_fd);
+ if (idle_pages_warn_only == -1)
+ idle_pages_warn_only = access_tracking_unreliable();
+
+ if (lru_gen_usable()) {
+ bool cg_created = true;
+ int ret;
- for_each_guest_mode(run_test, &params);
+ puts("Using lru_gen for aging");
+ use_lru_gen = true;
+
+ if (cg_find_controller_root(cgroup_root, sizeof(cgroup_root), "memory"))
+ ksft_exit_skip("Cannot find memory cgroup controller\n");
+
+ new_cg = cg_name(cgroup_root, TEST_MEMCG_NAME);
+ printf("Creating cgroup: %s\n", new_cg);
+ if (cg_create(new_cg)) {
+ if (errno == EEXIST) {
+ printf("Found existing cgroup");
+ cg_created = false;
+ } else {
+ ksft_exit_skip("could not create new cgroup: %s\n", new_cg);
+ }
+ }
+
+ /*
+ * This will fork off a new process to run the test within
+ * a new memcg, so we need to properly propagate the return
+ * value up.
+ */
+ ret = cg_run(new_cg, &run_test_for_each_guest_mode, &params);
+ if (cg_created)
+ cg_destroy(new_cg);
+ if (ret < 0)
+ TEST_FAIL("child did not spawn or was abnormally killed");
+ if (ret)
+ return ret;
+ } else {
+ page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+ __TEST_REQUIRE(page_idle_fd >= 0,
+ "Couldn't open /sys/kernel/mm/page_idle/bitmap. "
+ "Is CONFIG_IDLE_PAGE_TRACKING enabled?");
+
+ close(page_idle_fd);
+
+ puts("Using page_idle for aging");
+ run_test_for_each_guest_mode(NULL, &params);
+ }
return 0;
}
diff --git a/tools/testing/selftests/kvm/arm64/host_sve.c b/tools/testing/selftests/kvm/arm64/host_sve.c
new file mode 100644
index 000000000000..3826772fd470
--- /dev/null
+++ b/tools/testing/selftests/kvm/arm64/host_sve.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Host SVE: Check FPSIMD/SVE/SME save/restore over KVM_RUN ioctls.
+ *
+ * Copyright 2025 Arm, Ltd
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <sys/auxv.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "ucall_common.h"
+
+static void guest_code(void)
+{
+ for (int i = 0; i < 10; i++) {
+ GUEST_UCALL_NONE();
+ }
+
+ GUEST_DONE();
+}
+
+void handle_sigill(int sig, siginfo_t *info, void *ctx)
+{
+ ucontext_t *uctx = ctx;
+
+ printf(" < host signal %d >\n", sig);
+
+ /*
+ * Skip the UDF
+ */
+ uctx->uc_mcontext.pc += 4;
+}
+
+void register_sigill_handler(void)
+{
+ struct sigaction sa = {
+ .sa_sigaction = handle_sigill,
+ .sa_flags = SA_SIGINFO,
+ };
+ sigaction(SIGILL, &sa, NULL);
+}
+
+static void do_sve_roundtrip(void)
+{
+ unsigned long before, after;
+
+ /*
+ * Set all bits in a predicate register, force a save/restore via a
+ * SIGILL (which handle_sigill() will recover from), then report
+ * whether the value has changed.
+ */
+ asm volatile(
+ " .arch_extension sve\n"
+ " ptrue p0.B\n"
+ " cntp %[before], p0, p0.B\n"
+ " udf #0\n"
+ " cntp %[after], p0, p0.B\n"
+ : [before] "=r" (before),
+ [after] "=r" (after)
+ :
+ : "p0"
+ );
+
+ if (before != after) {
+ TEST_FAIL("Signal roundtrip discarded predicate bits (%ld => %ld)\n",
+ before, after);
+ } else {
+ printf("Signal roundtrip preserved predicate bits (%ld => %ld)\n",
+ before, after);
+ }
+}
+
+static void test_run(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ register_sigill_handler();
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ do_sve_roundtrip();
+
+ while (!guest_done) {
+
+ printf("Running VCPU...\n");
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_NONE:
+ do_sve_roundtrip();
+ do_sve_roundtrip();
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ /*
+ * This is testing the host environment, we don't care about
+ * guest SVE support.
+ */
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+ printf("SVE not supported\n");
+ return KSFT_SKIP;
+ }
+
+ test_run();
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 322b9d3b0125..8f422bfdfcb9 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -15,6 +15,8 @@
#include "test_util.h"
#include <linux/bitfield.h>
+bool have_cap_arm_mte;
+
enum ftr_type {
FTR_EXACT, /* Use a predefined safe value */
FTR_LOWER_SAFE, /* Smaller value is safe */
@@ -129,10 +131,10 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1),
REG_FTR_END,
};
@@ -543,6 +545,70 @@ static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu)
ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n");
}
+#define MTE_IDREG_TEST 1
+static void test_user_set_mte_reg(struct kvm_vcpu *vcpu)
+{
+ uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ struct reg_mask_range range = {
+ .addr = (__u64)masks,
+ };
+ uint64_t val;
+ uint64_t mte;
+ uint64_t mte_frac;
+ int idx, err;
+
+ if (!have_cap_arm_mte) {
+ ksft_test_result_skip("MTE capability not supported, nothing to test\n");
+ return;
+ }
+
+ /* Get writable masks for feature ID registers */
+ memset(range.reserved, 0, sizeof(range.reserved));
+ vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+ idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1);
+ if ((masks[idx] & ID_AA64PFR1_EL1_MTE_frac_MASK) == ID_AA64PFR1_EL1_MTE_frac_MASK) {
+ ksft_test_result_skip("ID_AA64PFR1_EL1.MTE_frac is officially writable, nothing to test\n");
+ return;
+ }
+
+ /*
+ * When MTE is supported but MTE_ASYMM is not (ID_AA64PFR1_EL1.MTE == 2)
+ * ID_AA64PFR1_EL1.MTE_frac == 0xF indicates MTE_ASYNC is unsupported
+ * and MTE_frac == 0 indicates it is supported.
+ *
+ * As MTE_frac was previously unconditionally read as 0, check
+ * that the set to 0 succeeds but does not change MTE_frac
+ * from unsupported (0xF) to supported (0).
+ *
+ */
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+
+ mte = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE), val);
+ mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val);
+ if (mte != ID_AA64PFR1_EL1_MTE_MTE2 ||
+ mte_frac != ID_AA64PFR1_EL1_MTE_frac_NI) {
+ ksft_test_result_skip("MTE_ASYNC or MTE_ASYMM are supported, nothing to test\n");
+ return;
+ }
+
+ /* Try to set MTE_frac=0. */
+ val &= ~ID_AA64PFR1_EL1_MTE_frac_MASK;
+ val |= FIELD_PREP(ID_AA64PFR1_EL1_MTE_frac_MASK, 0);
+ err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val);
+ if (err) {
+ ksft_test_result_fail("ID_AA64PFR1_EL1.MTE_frac=0 was not accepted\n");
+ return;
+ }
+
+ val = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1));
+ mte_frac = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_MTE_frac), val);
+ if (mte_frac == ID_AA64PFR1_EL1_MTE_frac_NI)
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac=0 accepted and still 0xF\n");
+ else
+ ksft_test_result_pass("ID_AA64PFR1_EL1.MTE_frac no longer 0xF\n");
+}
+
static void test_guest_reg_read(struct kvm_vcpu *vcpu)
{
bool done = false;
@@ -673,6 +739,14 @@ static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
ksft_test_result_pass("%s\n", __func__);
}
+void kvm_arch_vm_post_create(struct kvm_vm *vm)
+{
+ if (vm_check_cap(vm, KVM_CAP_ARM_MTE)) {
+ vm_enable_cap(vm, KVM_CAP_ARM_MTE, 0);
+ have_cap_arm_mte = true;
+ }
+}
+
int main(void)
{
struct kvm_vcpu *vcpu;
@@ -701,7 +775,7 @@ int main(void)
ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) +
ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) +
ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 +
- MPAM_IDREG_TEST;
+ MPAM_IDREG_TEST + MTE_IDREG_TEST;
ksft_set_plan(test_cnt);
@@ -709,6 +783,7 @@ int main(void)
test_vcpu_ftr_id_regs(vcpu);
test_vcpu_non_ftr_id_regs(vcpu);
test_user_set_mpam_reg(vcpu);
+ test_user_set_mte_reg(vcpu);
test_guest_reg_read(vcpu);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 373912464fb4..bee65ca08721 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -177,6 +177,7 @@ enum vm_guest_mode {
VM_MODE_P36V48_4K,
VM_MODE_P36V48_16K,
VM_MODE_P36V48_64K,
+ VM_MODE_P47V47_16K,
VM_MODE_P36V47_16K,
NUM_VM_MODES,
};
@@ -232,6 +233,11 @@ extern enum vm_guest_mode vm_mode_default;
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 8)
+#elif defined(__loongarch__)
+#define VM_MODE_DEFAULT VM_MODE_P47V47_16K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
#endif
#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT)
@@ -549,6 +555,41 @@ void kvm_get_stat(struct kvm_binary_stats *stats, const char *name,
#define vm_get_stat(vm, stat) __get_stat(&(vm)->stats, stat)
#define vcpu_get_stat(vcpu, stat) __get_stat(&(vcpu)->stats, stat)
+static inline bool read_smt_control(char *buf, size_t buf_size)
+{
+ FILE *f = fopen("/sys/devices/system/cpu/smt/control", "r");
+ bool ret;
+
+ if (!f)
+ return false;
+
+ ret = fread(buf, sizeof(*buf), buf_size, f) > 0;
+ fclose(f);
+
+ return ret;
+}
+
+static inline bool is_smt_possible(void)
+{
+ char buf[16];
+
+ if (read_smt_control(buf, sizeof(buf)) &&
+ (!strncmp(buf, "forceoff", 8) || !strncmp(buf, "notsupported", 12)))
+ return false;
+
+ return true;
+}
+
+static inline bool is_smt_on(void)
+{
+ char buf[16];
+
+ if (read_smt_control(buf, sizeof(buf)) && !strncmp(buf, "on", 2))
+ return true;
+
+ return false;
+}
+
void vm_create_irqchip(struct kvm_vm *vm);
static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
diff --git a/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
new file mode 100644
index 000000000000..e43a57d99b56
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/loongarch/processor.h b/tools/testing/selftests/kvm/include/loongarch/processor.h
new file mode 100644
index 000000000000..6427a3275e6a
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/processor.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#ifndef __ASSEMBLER__
+#include "ucall_common.h"
+
+#else
+/* general registers */
+#define zero $r0
+#define ra $r1
+#define tp $r2
+#define sp $r3
+#define a0 $r4
+#define a1 $r5
+#define a2 $r6
+#define a3 $r7
+#define a4 $r8
+#define a5 $r9
+#define a6 $r10
+#define a7 $r11
+#define t0 $r12
+#define t1 $r13
+#define t2 $r14
+#define t3 $r15
+#define t4 $r16
+#define t5 $r17
+#define t6 $r18
+#define t7 $r19
+#define t8 $r20
+#define u0 $r21
+#define fp $r22
+#define s0 $r23
+#define s1 $r24
+#define s2 $r25
+#define s3 $r26
+#define s4 $r27
+#define s5 $r28
+#define s6 $r29
+#define s7 $r30
+#define s8 $r31
+#endif
+
+/*
+ * LoongArch page table entry definition
+ * Original header file arch/loongarch/include/asm/loongarch.h
+ */
+#define _PAGE_VALID_SHIFT 0
+#define _PAGE_DIRTY_SHIFT 1
+#define _PAGE_PLV_SHIFT 2 /* 2~3, two bits */
+#define PLV_KERN 0
+#define PLV_USER 3
+#define PLV_MASK 0x3
+#define _CACHE_SHIFT 4 /* 4~5, two bits */
+#define _PAGE_PRESENT_SHIFT 7
+#define _PAGE_WRITE_SHIFT 8
+
+#define _PAGE_VALID BIT_ULL(_PAGE_VALID_SHIFT)
+#define _PAGE_PRESENT BIT_ULL(_PAGE_PRESENT_SHIFT)
+#define _PAGE_WRITE BIT_ULL(_PAGE_WRITE_SHIFT)
+#define _PAGE_DIRTY BIT_ULL(_PAGE_DIRTY_SHIFT)
+#define _PAGE_USER (PLV_USER << _PAGE_PLV_SHIFT)
+#define __READABLE (_PAGE_VALID)
+#define __WRITEABLE (_PAGE_DIRTY | _PAGE_WRITE)
+/* Coherent Cached */
+#define _CACHE_CC BIT_ULL(_CACHE_SHIFT)
+#define PS_4K 0x0000000c
+#define PS_16K 0x0000000e
+#define PS_64K 0x00000010
+#define PS_DEFAULT_SIZE PS_16K
+
+/* LoongArch Basic CSR registers */
+#define LOONGARCH_CSR_CRMD 0x0 /* Current mode info */
+#define CSR_CRMD_PG_SHIFT 4
+#define CSR_CRMD_PG BIT_ULL(CSR_CRMD_PG_SHIFT)
+#define CSR_CRMD_IE_SHIFT 2
+#define CSR_CRMD_IE BIT_ULL(CSR_CRMD_IE_SHIFT)
+#define CSR_CRMD_PLV_SHIFT 0
+#define CSR_CRMD_PLV_WIDTH 2
+#define CSR_CRMD_PLV (0x3UL << CSR_CRMD_PLV_SHIFT)
+#define PLV_MASK 0x3
+#define LOONGARCH_CSR_PRMD 0x1
+#define LOONGARCH_CSR_EUEN 0x2
+#define LOONGARCH_CSR_ECFG 0x4
+#define LOONGARCH_CSR_ESTAT 0x5 /* Exception status */
+#define LOONGARCH_CSR_ERA 0x6 /* ERA */
+#define LOONGARCH_CSR_BADV 0x7 /* Bad virtual address */
+#define LOONGARCH_CSR_EENTRY 0xc
+#define LOONGARCH_CSR_TLBIDX 0x10 /* TLB Index, EHINV, PageSize */
+#define CSR_TLBIDX_PS_SHIFT 24
+#define CSR_TLBIDX_PS_WIDTH 6
+#define CSR_TLBIDX_PS (0x3fUL << CSR_TLBIDX_PS_SHIFT)
+#define CSR_TLBIDX_SIZEM 0x3f000000
+#define CSR_TLBIDX_SIZE CSR_TLBIDX_PS_SHIFT
+#define LOONGARCH_CSR_ASID 0x18 /* ASID */
+#define LOONGARCH_CSR_PGDL 0x19
+#define LOONGARCH_CSR_PGDH 0x1a
+/* Page table base */
+#define LOONGARCH_CSR_PGD 0x1b
+#define LOONGARCH_CSR_PWCTL0 0x1c
+#define LOONGARCH_CSR_PWCTL1 0x1d
+#define LOONGARCH_CSR_STLBPGSIZE 0x1e
+#define LOONGARCH_CSR_CPUID 0x20
+#define LOONGARCH_CSR_KS0 0x30
+#define LOONGARCH_CSR_KS1 0x31
+#define LOONGARCH_CSR_TMID 0x40
+#define LOONGARCH_CSR_TCFG 0x41
+/* TLB refill exception entry */
+#define LOONGARCH_CSR_TLBRENTRY 0x88
+#define LOONGARCH_CSR_TLBRSAVE 0x8b
+#define LOONGARCH_CSR_TLBREHI 0x8e
+#define CSR_TLBREHI_PS_SHIFT 0
+#define CSR_TLBREHI_PS (0x3fUL << CSR_TLBREHI_PS_SHIFT)
+
+#define EXREGS_GPRS (32)
+
+#ifndef __ASSEMBLER__
+void handle_tlb_refill(void);
+void handle_exception(void);
+
+struct ex_regs {
+ unsigned long regs[EXREGS_GPRS];
+ unsigned long pc;
+ unsigned long estat;
+ unsigned long badv;
+};
+
+#define PC_OFFSET_EXREGS offsetof(struct ex_regs, pc)
+#define ESTAT_OFFSET_EXREGS offsetof(struct ex_regs, estat)
+#define BADV_OFFSET_EXREGS offsetof(struct ex_regs, badv)
+#define EXREGS_SIZE sizeof(struct ex_regs)
+
+#else
+#define PC_OFFSET_EXREGS ((EXREGS_GPRS + 0) * 8)
+#define ESTAT_OFFSET_EXREGS ((EXREGS_GPRS + 1) * 8)
+#define BADV_OFFSET_EXREGS ((EXREGS_GPRS + 2) * 8)
+#define EXREGS_SIZE ((EXREGS_GPRS + 3) * 8)
+#endif
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/loongarch/ucall.h b/tools/testing/selftests/kvm/include/loongarch/ucall.h
new file mode 100644
index 000000000000..4ec801f37f00
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/loongarch/ucall.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/lru_gen_util.h b/tools/testing/selftests/kvm/include/lru_gen_util.h
new file mode 100644
index 000000000000..d32ff5d8ffd0
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/lru_gen_util.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tools for integrating with lru_gen, like parsing the lru_gen debugfs output.
+ *
+ * Copyright (C) 2025, Google LLC.
+ */
+#ifndef SELFTEST_KVM_LRU_GEN_UTIL_H
+#define SELFTEST_KVM_LRU_GEN_UTIL_H
+
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+
+#include "test_util.h"
+
+#define MAX_NR_GENS 16 /* MAX_NR_GENS in include/linux/mmzone.h */
+#define MAX_NR_NODES 4 /* Maximum number of nodes supported by the test */
+
+#define LRU_GEN_DEBUGFS "/sys/kernel/debug/lru_gen"
+#define LRU_GEN_ENABLED_PATH "/sys/kernel/mm/lru_gen/enabled"
+#define LRU_GEN_ENABLED 1
+#define LRU_GEN_MM_WALK 2
+
+struct generation_stats {
+ int gen;
+ long age_ms;
+ long nr_anon;
+ long nr_file;
+};
+
+struct node_stats {
+ int node;
+ int nr_gens; /* Number of populated gens entries. */
+ struct generation_stats gens[MAX_NR_GENS];
+};
+
+struct memcg_stats {
+ unsigned long memcg_id;
+ int nr_nodes; /* Number of populated nodes entries. */
+ struct node_stats nodes[MAX_NR_NODES];
+};
+
+void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg);
+long lru_gen_sum_memcg_stats(const struct memcg_stats *stats);
+long lru_gen_sum_memcg_stats_for_gen(int gen, const struct memcg_stats *stats);
+void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg);
+int lru_gen_find_generation(const struct memcg_stats *stats,
+ unsigned long total_pages);
+bool lru_gen_usable(void);
+
+#endif /* SELFTEST_KVM_LRU_GEN_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index 5f389166338c..162f303d9daa 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -11,6 +11,19 @@
#include <asm/csr.h>
#include "kvm_util.h"
+#define INSN_OPCODE_MASK 0x007c
+#define INSN_OPCODE_SHIFT 2
+#define INSN_OPCODE_SYSTEM 28
+
+#define INSN_MASK_FUNCT3 0x7000
+#define INSN_SHIFT_FUNCT3 12
+
+#define INSN_CSR_MASK 0xfff00000
+#define INSN_CSR_SHIFT 20
+
+#define GET_RM(insn) (((insn) & INSN_MASK_FUNCT3) >> INSN_SHIFT_FUNCT3)
+#define GET_CSR_NUM(insn) (((insn) & INSN_CSR_MASK) >> INSN_CSR_SHIFT)
+
static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
uint64_t idx, uint64_t size)
{
@@ -60,7 +73,8 @@ static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext)
return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext));
}
-struct ex_regs {
+struct pt_regs {
+ unsigned long epc;
unsigned long ra;
unsigned long sp;
unsigned long gp;
@@ -92,16 +106,19 @@ struct ex_regs {
unsigned long t4;
unsigned long t5;
unsigned long t6;
- unsigned long epc;
+ /* Supervisor/Machine CSRs */
unsigned long status;
+ unsigned long badaddr;
unsigned long cause;
+ /* a0 value before the syscall */
+ unsigned long orig_a0;
};
#define NR_VECTORS 2
#define NR_EXCEPTIONS 32
#define EC_MASK (NR_EXCEPTIONS - 1)
-typedef void(*exception_handler_fn)(struct ex_regs *);
+typedef void(*exception_handler_fn)(struct pt_regs *);
void vm_init_vector_tables(struct kvm_vm *vm);
void vcpu_init_vector_tables(struct kvm_vcpu *vcpu);
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 77d13d7920cb..c6ef895fbd9a 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -153,6 +153,7 @@ bool is_backing_src_hugetlb(uint32_t i);
void backing_src_help(const char *flag);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
long get_run_delay(void);
+bool is_numa_balancing_enabled(void);
/*
* Whether or not the given source type is shared memory (as opposed to
diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h
index 32ab6ca7ec32..b11b5a53ebd5 100644
--- a/tools/testing/selftests/kvm/include/x86/processor.h
+++ b/tools/testing/selftests/kvm/include/x86/processor.h
@@ -203,6 +203,7 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+#define X86_FEATURE_SEV_SNP KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4)
#define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
#define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
diff --git a/tools/testing/selftests/kvm/include/x86/sev.h b/tools/testing/selftests/kvm/include/x86/sev.h
index 82c11c81a956..008b4169f5e2 100644
--- a/tools/testing/selftests/kvm/include/x86/sev.h
+++ b/tools/testing/selftests/kvm/include/x86/sev.h
@@ -25,19 +25,51 @@ enum sev_guest_state {
#define SEV_POLICY_NO_DBG (1UL << 0)
#define SEV_POLICY_ES (1UL << 2)
+#define SNP_POLICY_SMT (1ULL << 16)
+#define SNP_POLICY_RSVD_MBO (1ULL << 17)
+#define SNP_POLICY_DBG (1ULL << 19)
+
#define GHCB_MSR_TERM_REQ 0x100
+static inline bool is_sev_snp_vm(struct kvm_vm *vm)
+{
+ return vm->type == KVM_X86_SNP_VM;
+}
+
+static inline bool is_sev_es_vm(struct kvm_vm *vm)
+{
+ return is_sev_snp_vm(vm) || vm->type == KVM_X86_SEV_ES_VM;
+}
+
+static inline bool is_sev_vm(struct kvm_vm *vm)
+{
+ return is_sev_es_vm(vm) || vm->type == KVM_X86_SEV_VM;
+}
+
void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
void sev_vm_launch_finish(struct kvm_vm *vm);
+void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy);
+void snp_vm_launch_update(struct kvm_vm *vm);
+void snp_vm_launch_finish(struct kvm_vm *vm);
struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
struct kvm_vcpu **cpu);
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
+void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement);
kvm_static_assert(SEV_RET_SUCCESS == 0);
/*
+ * A SEV-SNP VM requires the policy reserved bit to always be set.
+ * The SMT policy bit is also required to be set based on SMT being
+ * available and active on the system.
+ */
+static inline u64 snp_default_policy(void)
+{
+ return SNP_POLICY_RSVD_MBO | (is_smt_on() ? SNP_POLICY_SMT : 0);
+}
+
+/*
* The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
* instead of a proper struct. The size of the parameter is embedded in the
* ioctl number, i.e. is ABI and thus immutable. Hack around the mess by
@@ -70,6 +102,12 @@ kvm_static_assert(SEV_RET_SUCCESS == 0);
void sev_vm_init(struct kvm_vm *vm);
void sev_es_vm_init(struct kvm_vm *vm);
+void snp_vm_init(struct kvm_vm *vm);
+
+static inline void vmgexit(void)
+{
+ __asm__ __volatile__("rep; vmmcall");
+}
static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
struct userspace_mem_region *region)
@@ -93,4 +131,17 @@ static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
}
+static inline void snp_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
+ uint64_t hva, uint64_t size, uint8_t type)
+{
+ struct kvm_sev_snp_launch_update update_data = {
+ .uaddr = hva,
+ .gfn_start = gpa >> PAGE_SHIFT,
+ .len = size,
+ .type = type,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_UPDATE, &update_data);
+}
+
#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 815bc45dd8dc..a055343a7bf7 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -222,6 +222,7 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
[VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
[VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
+ [VM_MODE_P47V47_16K] = "PA-bits:47, VA-bits:47, 16K pages",
[VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
@@ -248,6 +249,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
[VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
[VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
[VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
+ [VM_MODE_P47V47_16K] = { 47, 47, 0x4000, 14 },
[VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
@@ -319,6 +321,7 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
case VM_MODE_P36V48_16K:
vm->pgtable_levels = 4;
break;
+ case VM_MODE_P47V47_16K:
case VM_MODE_P36V47_16K:
vm->pgtable_levels = 3;
break;
@@ -444,6 +447,15 @@ void kvm_set_files_rlimit(uint32_t nr_vcpus)
}
+static bool is_guest_memfd_required(struct vm_shape shape)
+{
+#ifdef __x86_64__
+ return shape.type == KVM_X86_SNP_VM;
+#else
+ return false;
+#endif
+}
+
struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
uint64_t nr_extra_pages)
{
@@ -451,7 +463,7 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
nr_extra_pages);
struct userspace_mem_region *slot0;
struct kvm_vm *vm;
- int i;
+ int i, flags;
kvm_set_files_rlimit(nr_runnable_vcpus);
@@ -460,7 +472,15 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
vm = ____vm_create(shape);
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0);
+ /*
+ * Force GUEST_MEMFD for the primary memory region if necessary, e.g.
+ * for CoCo VMs that require GUEST_MEMFD backed private memory.
+ */
+ flags = 0;
+ if (is_guest_memfd_required(shape))
+ flags |= KVM_MEM_GUEST_MEMFD;
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, flags);
for (i = 0; i < NR_MEM_REGIONS; i++)
vm->memslots[i] = 0;
diff --git a/tools/testing/selftests/kvm/lib/loongarch/exception.S b/tools/testing/selftests/kvm/lib/loongarch/exception.S
new file mode 100644
index 000000000000..88bfa505c6f5
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/exception.S
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "processor.h"
+
+/* address of refill exception should be 4K aligned */
+.balign 4096
+.global handle_tlb_refill
+handle_tlb_refill:
+ csrwr t0, LOONGARCH_CSR_TLBRSAVE
+ csrrd t0, LOONGARCH_CSR_PGD
+ lddir t0, t0, 3
+ lddir t0, t0, 1
+ ldpte t0, 0
+ ldpte t0, 1
+ tlbfill
+ csrrd t0, LOONGARCH_CSR_TLBRSAVE
+ ertn
+
+ /*
+ * save and restore all gprs except base register,
+ * and default value of base register is sp ($r3).
+ */
+.macro save_gprs base
+ .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ st.d $r\n, \base, 8 * \n
+ .endr
+.endm
+
+.macro restore_gprs base
+ .irp n,1,2,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+ ld.d $r\n, \base, 8 * \n
+ .endr
+.endm
+
+/* address of general exception should be 4K aligned */
+.balign 4096
+.global handle_exception
+handle_exception:
+ csrwr sp, LOONGARCH_CSR_KS0
+ csrrd sp, LOONGARCH_CSR_KS1
+ addi.d sp, sp, -EXREGS_SIZE
+
+ save_gprs sp
+ /* save sp register to stack */
+ csrrd t0, LOONGARCH_CSR_KS0
+ st.d t0, sp, 3 * 8
+
+ csrrd t0, LOONGARCH_CSR_ERA
+ st.d t0, sp, PC_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_ESTAT
+ st.d t0, sp, ESTAT_OFFSET_EXREGS
+ csrrd t0, LOONGARCH_CSR_BADV
+ st.d t0, sp, BADV_OFFSET_EXREGS
+
+ or a0, sp, zero
+ bl route_exception
+ restore_gprs sp
+ csrrd sp, LOONGARCH_CSR_KS0
+ ertn
diff --git a/tools/testing/selftests/kvm/lib/loongarch/processor.c b/tools/testing/selftests/kvm/lib/loongarch/processor.c
new file mode 100644
index 000000000000..0ac1abcb71cb
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/processor.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <assert.h>
+#include <linux/compiler.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "ucall_common.h"
+
+#define LOONGARCH_PAGE_TABLE_PHYS_MIN 0x200000
+#define LOONGARCH_GUEST_STACK_VADDR_MIN 0x200000
+
+static vm_paddr_t invalid_pgtable[4];
+
+static uint64_t virt_pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+{
+ unsigned int shift;
+ uint64_t mask;
+
+ shift = level * (vm->page_shift - 3) + vm->page_shift;
+ mask = (1UL << (vm->page_shift - 3)) - 1;
+ return (gva >> shift) & mask;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+ return entry & ~((0x1UL << vm->page_shift) - 1);
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+ return 1 << (vm->page_shift - 3);
+}
+
+static void virt_set_pgtable(struct kvm_vm *vm, vm_paddr_t table, vm_paddr_t child)
+{
+ uint64_t *ptep;
+ int i, ptrs_per_pte;
+
+ ptep = addr_gpa2hva(vm, table);
+ ptrs_per_pte = 1 << (vm->page_shift - 3);
+ for (i = 0; i < ptrs_per_pte; i++)
+ WRITE_ONCE(*(ptep + i), child);
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ int i;
+ vm_paddr_t child, table;
+
+ if (vm->pgd_created)
+ return;
+
+ child = table = 0;
+ for (i = 0; i < vm->pgtable_levels; i++) {
+ invalid_pgtable[i] = child;
+ table = vm_phy_page_alloc(vm, LOONGARCH_PAGE_TABLE_PHYS_MIN,
+ vm->memslots[MEM_REGION_PT]);
+ TEST_ASSERT(table, "Fail to allocate page tale at level %d\n", i);
+ virt_set_pgtable(vm, table, child);
+ child = table;
+ }
+ vm->pgd = table;
+ vm->pgd_created = true;
+}
+
+static int virt_pte_none(uint64_t *ptep, int level)
+{
+ return *ptep == invalid_pgtable[level];
+}
+
+static uint64_t *virt_populate_pte(struct kvm_vm *vm, vm_vaddr_t gva, int alloc)
+{
+ int level;
+ uint64_t *ptep;
+ vm_paddr_t child;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ child = vm->pgd;
+ level = vm->pgtable_levels - 1;
+ while (level > 0) {
+ ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
+ if (virt_pte_none(ptep, level)) {
+ if (alloc) {
+ child = vm_alloc_page_table(vm);
+ virt_set_pgtable(vm, child, invalid_pgtable[level - 1]);
+ WRITE_ONCE(*ptep, child);
+ } else
+ goto unmapped_gva;
+
+ } else
+ child = pte_addr(vm, *ptep);
+ level--;
+ }
+
+ ptep = addr_gpa2hva(vm, child) + virt_pte_index(vm, gva, level) * 8;
+ return ptep;
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
+ exit(EXIT_FAILURE);
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+
+ ptep = virt_populate_pte(vm, gva, 0);
+ TEST_ASSERT(*ptep != 0, "Virtual address vaddr: 0x%lx not mapped\n", gva);
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint32_t prot_bits;
+ uint64_t *ptep;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ "vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ "paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ "paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = virt_populate_pte(vm, vaddr, 1);
+ prot_bits = _PAGE_PRESENT | __READABLE | __WRITEABLE | _CACHE_CC | _PAGE_USER;
+ WRITE_ONCE(*ptep, paddr | prot_bits);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
+{
+ uint64_t pte, *ptep;
+ static const char * const type[] = { "pte", "pmd", "pud", "pgd"};
+
+ if (level < 0)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (virt_pte_none(ptep, level))
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n",
+ indent, "", type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level--);
+ }
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level;
+
+ if (!vm->pgd_created)
+ return;
+
+ level = vm->pgtable_levels - 1;
+ pte_dump(stream, vm, indent, vm->pgd, level);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
+ return;
+
+ TEST_FAIL("Unexpected exception (pc:0x%lx, estat:0x%lx, badv:0x%lx)",
+ uc.args[0], uc.args[1], uc.args[2]);
+}
+
+void route_exception(struct ex_regs *regs)
+{
+ unsigned long pc, estat, badv;
+
+ pc = regs->pc;
+ badv = regs->badv;
+ estat = regs->estat;
+ ucall(UCALL_UNHANDLED, 3, pc, estat, badv);
+ while (1) ;
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ int i;
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ "num: %u\n", num);
+
+ vcpu_regs_get(vcpu, &regs);
+
+ va_start(ap, num);
+ for (i = 0; i < num; i++)
+ regs.gpr[i + 4] = va_arg(ap, uint64_t);
+ va_end(ap);
+
+ vcpu_regs_set(vcpu, &regs);
+}
+
+static void loongarch_get_csr(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ uint64_t csrid;
+
+ csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_get_reg(vcpu, csrid, addr);
+}
+
+static void loongarch_set_csr(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ uint64_t csrid;
+
+ csrid = KVM_REG_LOONGARCH_CSR | KVM_REG_SIZE_U64 | 8 * id;
+ __vcpu_set_reg(vcpu, csrid, val);
+}
+
+static void loongarch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ int width;
+ unsigned long val;
+ struct kvm_vm *vm = vcpu->vm;
+
+ switch (vm->mode) {
+ case VM_MODE_P36V47_16K:
+ case VM_MODE_P47V47_16K:
+ break;
+
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ /* user mode and page enable mode */
+ val = PLV_USER | CSR_CRMD_PG;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_CRMD, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PRMD, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_EUEN, 1);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_ECFG, 0);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TCFG, 0);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_ASID, 1);
+
+ val = 0;
+ width = vm->page_shift - 3;
+
+ switch (vm->pgtable_levels) {
+ case 4:
+ /* pud page shift and width */
+ val = (vm->page_shift + width * 2) << 20 | (width << 25);
+ /* fall throuth */
+ case 3:
+ /* pmd page shift and width */
+ val |= (vm->page_shift + width) << 10 | (width << 15);
+ /* pte page shift and width */
+ val |= vm->page_shift | width << 5;
+ break;
+ default:
+ TEST_FAIL("Got %u page table levels, expected 3 or 4", vm->pgtable_levels);
+ }
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL0, val);
+
+ /* PGD page shift and width */
+ val = (vm->page_shift + width * (vm->pgtable_levels - 1)) | width << 6;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PWCTL1, val);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_PGDL, vm->pgd);
+
+ /*
+ * Refill exception runs on real mode
+ * Entry address should be physical address
+ */
+ val = addr_gva2gpa(vm, (unsigned long)handle_tlb_refill);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBRENTRY, val);
+
+ /*
+ * General exception runs on page-enabled mode
+ * Entry address should be virtual address
+ */
+ val = (unsigned long)handle_exception;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_EENTRY, val);
+
+ loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBIDX, &val);
+ val &= ~CSR_TLBIDX_SIZEM;
+ val |= PS_DEFAULT_SIZE << CSR_TLBIDX_SIZE;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBIDX, val);
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_STLBPGSIZE, PS_DEFAULT_SIZE);
+
+ /* LOONGARCH_CSR_KS1 is used for exception stack */
+ val = __vm_vaddr_alloc(vm, vm->page_size,
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+ TEST_ASSERT(val != 0, "No memory for exception stack");
+ val = val + vm->page_size;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_KS1, val);
+
+ loongarch_get_csr(vcpu, LOONGARCH_CSR_TLBREHI, &val);
+ val &= ~CSR_TLBREHI_PS;
+ val |= PS_DEFAULT_SIZE << CSR_TLBREHI_PS_SHIFT;
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TLBREHI, val);
+
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_CPUID, vcpu->id);
+ loongarch_set_csr(vcpu, LOONGARCH_CSR_TMID, vcpu->id);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ size_t stack_size;
+ uint64_t stack_vaddr;
+ struct kvm_regs regs;
+ struct kvm_vcpu *vcpu;
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ stack_size = vm->page_size;
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ LOONGARCH_GUEST_STACK_VADDR_MIN, MEM_REGION_DATA);
+ TEST_ASSERT(stack_vaddr != 0, "No memory for vm stack");
+
+ loongarch_vcpu_setup(vcpu);
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vcpu, &regs);
+ regs.gpr[3] = stack_vaddr + stack_size;
+ vcpu_regs_set(vcpu, &regs);
+
+ return vcpu;
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ struct kvm_regs regs;
+
+ /* Setup guest PC register */
+ vcpu_regs_get(vcpu, &regs);
+ regs.pc = (uint64_t)guest_code;
+ vcpu_regs_set(vcpu, &regs);
+}
diff --git a/tools/testing/selftests/kvm/lib/loongarch/ucall.c b/tools/testing/selftests/kvm/lib/loongarch/ucall.c
new file mode 100644
index 000000000000..fc6cbb50573f
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/loongarch/ucall.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ */
+#include "kvm_util.h"
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+vm_vaddr_t *ucall_exit_mmio_addr;
+
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+ vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
+
+ virt_map(vm, mmio_gva, mmio_gpa, 1);
+
+ vm->ucall_mmio_addr = mmio_gpa;
+
+ write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
+}
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_MMIO &&
+ run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
+ "Unexpected ucall exit mmio address access");
+
+ return (void *)(*((uint64_t *)run->mmio.data));
+ }
+
+ return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/lru_gen_util.c b/tools/testing/selftests/kvm/lib/lru_gen_util.c
new file mode 100644
index 000000000000..46a14fd63d9e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/lru_gen_util.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025, Google LLC.
+ */
+
+#include <time.h>
+
+#include "lru_gen_util.h"
+
+/*
+ * Tracks state while we parse memcg lru_gen stats. The file we're parsing is
+ * structured like this (some extra whitespace elided):
+ *
+ * memcg (id) (path)
+ * node (id)
+ * (gen_nr) (age_in_ms) (nr_anon_pages) (nr_file_pages)
+ */
+struct memcg_stats_parse_context {
+ bool consumed; /* Whether or not this line was consumed */
+ /* Next parse handler to invoke */
+ void (*next_handler)(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+ int current_node_idx; /* Current index in nodes array */
+ const char *name; /* The name of the memcg we're looking for */
+};
+
+static void memcg_stats_handle_searching(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+static void memcg_stats_handle_in_node(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line);
+
+struct split_iterator {
+ char *str;
+ char *save;
+};
+
+static char *split_next(struct split_iterator *it)
+{
+ char *ret = strtok_r(it->str, " \t\n\r", &it->save);
+
+ it->str = NULL;
+ return ret;
+}
+
+static void memcg_stats_handle_searching(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ struct split_iterator it = { .str = line };
+ char *prefix = split_next(&it);
+ char *memcg_id = split_next(&it);
+ char *memcg_name = split_next(&it);
+ char *end;
+
+ ctx->consumed = true;
+
+ if (!prefix || strcmp("memcg", prefix))
+ return; /* Not a memcg line (maybe empty), skip */
+
+ TEST_ASSERT(memcg_id && memcg_name,
+ "malformed memcg line; no memcg id or memcg_name");
+
+ if (strcmp(memcg_name + 1, ctx->name))
+ return; /* Wrong memcg, skip */
+
+ /* Found it! */
+
+ stats->memcg_id = strtoul(memcg_id, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed memcg id '%s'", memcg_id);
+ if (!stats->memcg_id)
+ return; /* Removed memcg? */
+
+ ctx->next_handler = memcg_stats_handle_in_memcg;
+}
+
+static void memcg_stats_handle_in_memcg(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ struct split_iterator it = { .str = line };
+ char *prefix = split_next(&it);
+ char *id = split_next(&it);
+ long found_node_id;
+ char *end;
+
+ ctx->consumed = true;
+ ctx->current_node_idx = -1;
+
+ if (!prefix)
+ return; /* Skip empty lines */
+
+ if (!strcmp("memcg", prefix)) {
+ /* Memcg done, found next one; stop. */
+ ctx->next_handler = NULL;
+ return;
+ } else if (strcmp("node", prefix))
+ TEST_ASSERT(false, "found malformed line after 'memcg ...',"
+ "token: '%s'", prefix);
+
+ /* At this point we know we have a node line. Parse the ID. */
+
+ TEST_ASSERT(id, "malformed node line; no node id");
+
+ found_node_id = strtol(id, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed node id '%s'", id);
+
+ ctx->current_node_idx = stats->nr_nodes++;
+ TEST_ASSERT(ctx->current_node_idx < MAX_NR_NODES,
+ "memcg has stats for too many nodes, max is %d",
+ MAX_NR_NODES);
+ stats->nodes[ctx->current_node_idx].node = found_node_id;
+
+ ctx->next_handler = memcg_stats_handle_in_node;
+}
+
+static void memcg_stats_handle_in_node(struct memcg_stats *stats,
+ struct memcg_stats_parse_context *ctx,
+ char *line)
+{
+ char *my_line = strdup(line);
+ struct split_iterator it = { .str = my_line };
+ char *gen, *age, *nr_anon, *nr_file;
+ struct node_stats *node_stats;
+ struct generation_stats *gen_stats;
+ char *end;
+
+ TEST_ASSERT(it.str, "failed to copy input line");
+
+ gen = split_next(&it);
+
+ if (!gen)
+ goto out_consume; /* Skip empty lines */
+
+ if (!strcmp("memcg", gen) || !strcmp("node", gen)) {
+ /*
+ * Reached next memcg or node section. Don't consume, let the
+ * other handler deal with this.
+ */
+ ctx->next_handler = memcg_stats_handle_in_memcg;
+ goto out;
+ }
+
+ node_stats = &stats->nodes[ctx->current_node_idx];
+ TEST_ASSERT(node_stats->nr_gens < MAX_NR_GENS,
+ "found too many generation lines; max is %d",
+ MAX_NR_GENS);
+ gen_stats = &node_stats->gens[node_stats->nr_gens++];
+
+ age = split_next(&it);
+ nr_anon = split_next(&it);
+ nr_file = split_next(&it);
+
+ TEST_ASSERT(age && nr_anon && nr_file,
+ "malformed generation line; not enough tokens");
+
+ gen_stats->gen = (int)strtol(gen, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed generation number '%s'", gen);
+
+ gen_stats->age_ms = strtol(age, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed generation age '%s'", age);
+
+ gen_stats->nr_anon = strtol(nr_anon, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed anonymous page count '%s'",
+ nr_anon);
+
+ gen_stats->nr_file = strtol(nr_file, &end, 10);
+ TEST_ASSERT(*end == '\0', "malformed file page count '%s'", nr_file);
+
+out_consume:
+ ctx->consumed = true;
+out:
+ free(my_line);
+}
+
+static void print_memcg_stats(const struct memcg_stats *stats, const char *name)
+{
+ int node, gen;
+
+ pr_debug("stats for memcg %s (id %lu):\n", name, stats->memcg_id);
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ pr_debug("\tnode %d\n", stats->nodes[node].node);
+ for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
+ const struct generation_stats *gstats =
+ &stats->nodes[node].gens[gen];
+
+ pr_debug("\t\tgen %d\tage_ms %ld"
+ "\tnr_anon %ld\tnr_file %ld\n",
+ gstats->gen, gstats->age_ms, gstats->nr_anon,
+ gstats->nr_file);
+ }
+ }
+}
+
+/* Re-read lru_gen debugfs information for @memcg into @stats. */
+void lru_gen_read_memcg_stats(struct memcg_stats *stats, const char *memcg)
+{
+ FILE *f;
+ ssize_t read = 0;
+ char *line = NULL;
+ size_t bufsz;
+ struct memcg_stats_parse_context ctx = {
+ .next_handler = memcg_stats_handle_searching,
+ .name = memcg,
+ };
+
+ memset(stats, 0, sizeof(struct memcg_stats));
+
+ f = fopen(LRU_GEN_DEBUGFS, "r");
+ TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
+
+ while (ctx.next_handler && (read = getline(&line, &bufsz, f)) > 0) {
+ ctx.consumed = false;
+
+ do {
+ ctx.next_handler(stats, &ctx, line);
+ if (!ctx.next_handler)
+ break;
+ } while (!ctx.consumed);
+ }
+
+ if (read < 0 && !feof(f))
+ TEST_ASSERT(false, "getline(%s) failed", LRU_GEN_DEBUGFS);
+
+ TEST_ASSERT(stats->memcg_id > 0, "Couldn't find memcg: %s\n"
+ "Did the memcg get created in the proper mount?",
+ memcg);
+ if (line)
+ free(line);
+ TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
+
+ print_memcg_stats(stats, memcg);
+}
+
+/*
+ * Find all pages tracked by lru_gen for this memcg in generation @target_gen.
+ *
+ * If @target_gen is negative, look for all generations.
+ */
+long lru_gen_sum_memcg_stats_for_gen(int target_gen,
+ const struct memcg_stats *stats)
+{
+ int node, gen;
+ long total_nr = 0;
+
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ const struct node_stats *node_stats = &stats->nodes[node];
+
+ for (gen = 0; gen < node_stats->nr_gens; ++gen) {
+ const struct generation_stats *gen_stats =
+ &node_stats->gens[gen];
+
+ if (target_gen >= 0 && gen_stats->gen != target_gen)
+ continue;
+
+ total_nr += gen_stats->nr_anon + gen_stats->nr_file;
+ }
+ }
+
+ return total_nr;
+}
+
+/* Find all pages tracked by lru_gen for this memcg. */
+long lru_gen_sum_memcg_stats(const struct memcg_stats *stats)
+{
+ return lru_gen_sum_memcg_stats_for_gen(-1, stats);
+}
+
+/*
+ * If lru_gen aging should force page table scanning.
+ *
+ * If you want to set this to false, you will need to do eviction
+ * before doing extra aging passes.
+ */
+static const bool force_scan = true;
+
+static void run_aging_impl(unsigned long memcg_id, int node_id, int max_gen)
+{
+ FILE *f = fopen(LRU_GEN_DEBUGFS, "w");
+ char *command;
+ size_t sz;
+
+ TEST_ASSERT(f, "fopen(%s) failed", LRU_GEN_DEBUGFS);
+ sz = asprintf(&command, "+ %lu %d %d 1 %d\n",
+ memcg_id, node_id, max_gen, force_scan);
+ TEST_ASSERT(sz > 0, "creating aging command failed");
+
+ pr_debug("Running aging command: %s", command);
+ if (fwrite(command, sizeof(char), sz, f) < sz) {
+ TEST_ASSERT(false, "writing aging command %s to %s failed",
+ command, LRU_GEN_DEBUGFS);
+ }
+
+ TEST_ASSERT(!fclose(f), "fclose(%s) failed", LRU_GEN_DEBUGFS);
+}
+
+void lru_gen_do_aging(struct memcg_stats *stats, const char *memcg)
+{
+ int node, gen;
+
+ pr_debug("lru_gen: invoking aging...\n");
+
+ /* Must read memcg stats to construct the proper aging command. */
+ lru_gen_read_memcg_stats(stats, memcg);
+
+ for (node = 0; node < stats->nr_nodes; ++node) {
+ int max_gen = 0;
+
+ for (gen = 0; gen < stats->nodes[node].nr_gens; ++gen) {
+ int this_gen = stats->nodes[node].gens[gen].gen;
+
+ max_gen = max_gen > this_gen ? max_gen : this_gen;
+ }
+
+ run_aging_impl(stats->memcg_id, stats->nodes[node].node,
+ max_gen);
+ }
+
+ /* Re-read so callers get updated information */
+ lru_gen_read_memcg_stats(stats, memcg);
+}
+
+/*
+ * Find which generation contains at least @pages pages, assuming that
+ * such a generation exists.
+ */
+int lru_gen_find_generation(const struct memcg_stats *stats,
+ unsigned long pages)
+{
+ int node, gen, gen_idx, min_gen = INT_MAX, max_gen = -1;
+
+ for (node = 0; node < stats->nr_nodes; ++node)
+ for (gen_idx = 0; gen_idx < stats->nodes[node].nr_gens;
+ ++gen_idx) {
+ gen = stats->nodes[node].gens[gen_idx].gen;
+ max_gen = gen > max_gen ? gen : max_gen;
+ min_gen = gen < min_gen ? gen : min_gen;
+ }
+
+ for (gen = min_gen; gen <= max_gen; ++gen)
+ /* See if this generation has enough pages. */
+ if (lru_gen_sum_memcg_stats_for_gen(gen, stats) > pages)
+ return gen;
+
+ return -1;
+}
+
+bool lru_gen_usable(void)
+{
+ long required_features = LRU_GEN_ENABLED | LRU_GEN_MM_WALK;
+ int lru_gen_fd, lru_gen_debug_fd;
+ char mglru_feature_str[8] = {};
+ long mglru_features;
+
+ lru_gen_fd = open(LRU_GEN_ENABLED_PATH, O_RDONLY);
+ if (lru_gen_fd < 0) {
+ puts("lru_gen: Could not open " LRU_GEN_ENABLED_PATH);
+ return false;
+ }
+ if (read(lru_gen_fd, &mglru_feature_str, 7) < 7) {
+ puts("lru_gen: Could not read from " LRU_GEN_ENABLED_PATH);
+ close(lru_gen_fd);
+ return false;
+ }
+ close(lru_gen_fd);
+
+ mglru_features = strtol(mglru_feature_str, NULL, 16);
+ if ((mglru_features & required_features) != required_features) {
+ printf("lru_gen: missing features, got: 0x%lx, expected: 0x%lx\n",
+ mglru_features, required_features);
+ printf("lru_gen: Try 'echo 0x%lx > /sys/kernel/mm/lru_gen/enabled'\n",
+ required_features);
+ return false;
+ }
+
+ lru_gen_debug_fd = open(LRU_GEN_DEBUGFS, O_RDWR);
+ __TEST_REQUIRE(lru_gen_debug_fd >= 0,
+ "lru_gen: Could not open " LRU_GEN_DEBUGFS ", "
+ "but lru_gen is enabled, so cannot use page_idle.");
+ close(lru_gen_debug_fd);
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S
index aa0abd3f35bb..b787b982e922 100644
--- a/tools/testing/selftests/kvm/lib/riscv/handlers.S
+++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S
@@ -10,85 +10,88 @@
#include <asm/csr.h>
.macro save_context
- addi sp, sp, (-8*34)
- sd x1, 0(sp)
- sd x2, 8(sp)
- sd x3, 16(sp)
- sd x4, 24(sp)
- sd x5, 32(sp)
- sd x6, 40(sp)
- sd x7, 48(sp)
- sd x8, 56(sp)
- sd x9, 64(sp)
- sd x10, 72(sp)
- sd x11, 80(sp)
- sd x12, 88(sp)
- sd x13, 96(sp)
- sd x14, 104(sp)
- sd x15, 112(sp)
- sd x16, 120(sp)
- sd x17, 128(sp)
- sd x18, 136(sp)
- sd x19, 144(sp)
- sd x20, 152(sp)
- sd x21, 160(sp)
- sd x22, 168(sp)
- sd x23, 176(sp)
- sd x24, 184(sp)
- sd x25, 192(sp)
- sd x26, 200(sp)
- sd x27, 208(sp)
- sd x28, 216(sp)
- sd x29, 224(sp)
- sd x30, 232(sp)
- sd x31, 240(sp)
+ addi sp, sp, (-8*36)
+ sd x1, 8(sp)
+ sd x2, 16(sp)
+ sd x3, 24(sp)
+ sd x4, 32(sp)
+ sd x5, 40(sp)
+ sd x6, 48(sp)
+ sd x7, 56(sp)
+ sd x8, 64(sp)
+ sd x9, 72(sp)
+ sd x10, 80(sp)
+ sd x11, 88(sp)
+ sd x12, 96(sp)
+ sd x13, 104(sp)
+ sd x14, 112(sp)
+ sd x15, 120(sp)
+ sd x16, 128(sp)
+ sd x17, 136(sp)
+ sd x18, 144(sp)
+ sd x19, 152(sp)
+ sd x20, 160(sp)
+ sd x21, 168(sp)
+ sd x22, 176(sp)
+ sd x23, 184(sp)
+ sd x24, 192(sp)
+ sd x25, 200(sp)
+ sd x26, 208(sp)
+ sd x27, 216(sp)
+ sd x28, 224(sp)
+ sd x29, 232(sp)
+ sd x30, 240(sp)
+ sd x31, 248(sp)
csrr s0, CSR_SEPC
csrr s1, CSR_SSTATUS
- csrr s2, CSR_SCAUSE
- sd s0, 248(sp)
+ csrr s2, CSR_STVAL
+ csrr s3, CSR_SCAUSE
+ sd s0, 0(sp)
sd s1, 256(sp)
sd s2, 264(sp)
+ sd s3, 272(sp)
.endm
.macro restore_context
+ ld s3, 272(sp)
ld s2, 264(sp)
ld s1, 256(sp)
- ld s0, 248(sp)
- csrw CSR_SCAUSE, s2
+ ld s0, 0(sp)
+ csrw CSR_SCAUSE, s3
csrw CSR_SSTATUS, s1
csrw CSR_SEPC, s0
- ld x31, 240(sp)
- ld x30, 232(sp)
- ld x29, 224(sp)
- ld x28, 216(sp)
- ld x27, 208(sp)
- ld x26, 200(sp)
- ld x25, 192(sp)
- ld x24, 184(sp)
- ld x23, 176(sp)
- ld x22, 168(sp)
- ld x21, 160(sp)
- ld x20, 152(sp)
- ld x19, 144(sp)
- ld x18, 136(sp)
- ld x17, 128(sp)
- ld x16, 120(sp)
- ld x15, 112(sp)
- ld x14, 104(sp)
- ld x13, 96(sp)
- ld x12, 88(sp)
- ld x11, 80(sp)
- ld x10, 72(sp)
- ld x9, 64(sp)
- ld x8, 56(sp)
- ld x7, 48(sp)
- ld x6, 40(sp)
- ld x5, 32(sp)
- ld x4, 24(sp)
- ld x3, 16(sp)
- ld x2, 8(sp)
- ld x1, 0(sp)
- addi sp, sp, (8*34)
+ ld x31, 248(sp)
+ ld x30, 240(sp)
+ ld x29, 232(sp)
+ ld x28, 224(sp)
+ ld x27, 216(sp)
+ ld x26, 208(sp)
+ ld x25, 200(sp)
+ ld x24, 192(sp)
+ ld x23, 184(sp)
+ ld x22, 176(sp)
+ ld x21, 168(sp)
+ ld x20, 160(sp)
+ ld x19, 152(sp)
+ ld x18, 144(sp)
+ ld x17, 136(sp)
+ ld x16, 128(sp)
+ ld x15, 120(sp)
+ ld x14, 112(sp)
+ ld x13, 104(sp)
+ ld x12, 96(sp)
+ ld x11, 88(sp)
+ ld x10, 80(sp)
+ ld x9, 72(sp)
+ ld x8, 64(sp)
+ ld x7, 56(sp)
+ ld x6, 48(sp)
+ ld x5, 40(sp)
+ ld x4, 32(sp)
+ ld x3, 24(sp)
+ ld x2, 16(sp)
+ ld x1, 8(sp)
+ addi sp, sp, (8*36)
.endm
.balign 4
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index dd663bcf0cc0..2eac7d4b59e9 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -402,7 +402,7 @@ struct handlers {
exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS];
};
-void route_exception(struct ex_regs *regs)
+void route_exception(struct pt_regs *regs)
{
struct handlers *handlers = (struct handlers *)exception_handlers;
int vector = 0, ec;
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 8ed0b74ae837..03eb99af9b8d 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -132,37 +132,57 @@ void print_skip(const char *fmt, ...)
puts(", skipping test");
}
-bool thp_configured(void)
+static bool test_sysfs_path(const char *path)
{
- int ret;
struct stat statbuf;
+ int ret;
- ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
+ ret = stat(path, &statbuf);
TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
- "Error in stating /sys/kernel/mm/transparent_hugepage");
+ "Error in stat()ing '%s'", path);
return ret == 0;
}
-size_t get_trans_hugepagesz(void)
+bool thp_configured(void)
+{
+ return test_sysfs_path("/sys/kernel/mm/transparent_hugepage");
+}
+
+static size_t get_sysfs_val(const char *path)
{
size_t size;
FILE *f;
int ret;
- TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
-
- f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
- TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size");
+ f = fopen(path, "r");
+ TEST_ASSERT(f, "Error opening '%s'", path);
ret = fscanf(f, "%ld", &size);
+ TEST_ASSERT(ret > 0, "Error reading '%s'", path);
+
+ /* Re-scan the input stream to verify the entire file was read. */
ret = fscanf(f, "%ld", &size);
- TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size");
- fclose(f);
+ TEST_ASSERT(ret < 1, "Error reading '%s'", path);
+ fclose(f);
return size;
}
+size_t get_trans_hugepagesz(void)
+{
+ TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
+
+ return get_sysfs_val("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size");
+}
+
+bool is_numa_balancing_enabled(void)
+{
+ if (!test_sysfs_path("/proc/sys/kernel/numa_balancing"))
+ return false;
+ return get_sysfs_val("/proc/sys/kernel/numa_balancing") == 1;
+}
+
size_t get_def_hugetlb_pagesz(void)
{
char buf[64];
diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c
index bd5a802fa7a5..a92dc1dad085 100644
--- a/tools/testing/selftests/kvm/lib/x86/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86/processor.c
@@ -639,7 +639,7 @@ void kvm_arch_vm_post_create(struct kvm_vm *vm)
sync_global_to_guest(vm, host_cpu_is_amd);
sync_global_to_guest(vm, is_forced_emulation_enabled);
- if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ if (is_sev_vm(vm)) {
struct kvm_sev_init init = { 0 };
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
@@ -1156,7 +1156,7 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
void kvm_init_vm_address_properties(struct kvm_vm *vm)
{
- if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ if (is_sev_vm(vm)) {
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
vm->gpa_tag_mask = vm->arch.c_bit;
diff --git a/tools/testing/selftests/kvm/lib/x86/sev.c b/tools/testing/selftests/kvm/lib/x86/sev.c
index e9535ee20b7f..c3a9838f4806 100644
--- a/tools/testing/selftests/kvm/lib/x86/sev.c
+++ b/tools/testing/selftests/kvm/lib/x86/sev.c
@@ -14,7 +14,8 @@
* and find the first range, but that's correct because the condition
* expression would cause us to quit the loop.
*/
-static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region,
+ uint8_t page_type, bool private)
{
const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
const vm_paddr_t gpa_base = region->region.guest_phys_addr;
@@ -24,25 +25,35 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio
if (!sparsebit_any_set(protected_phy_pages))
return;
- sev_register_encrypted_memory(vm, region);
+ if (!is_sev_snp_vm(vm))
+ sev_register_encrypted_memory(vm, region);
sparsebit_for_each_set_range(protected_phy_pages, i, j) {
const uint64_t size = (j - i + 1) * vm->page_size;
const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
- sev_launch_update_data(vm, gpa_base + offset, size);
+ if (private)
+ vm_mem_set_private(vm, gpa_base + offset, size);
+
+ if (is_sev_snp_vm(vm))
+ snp_launch_update_data(vm, gpa_base + offset,
+ (uint64_t)addr_gpa2hva(vm, gpa_base + offset),
+ size, page_type);
+ else
+ sev_launch_update_data(vm, gpa_base + offset, size);
+
}
}
void sev_vm_init(struct kvm_vm *vm)
{
if (vm->type == KVM_X86_DEFAULT_VM) {
- assert(vm->arch.sev_fd == -1);
+ TEST_ASSERT_EQ(vm->arch.sev_fd, -1);
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
} else {
struct kvm_sev_init init = { 0 };
- assert(vm->type == KVM_X86_SEV_VM);
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_VM);
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
}
}
@@ -50,16 +61,24 @@ void sev_vm_init(struct kvm_vm *vm)
void sev_es_vm_init(struct kvm_vm *vm)
{
if (vm->type == KVM_X86_DEFAULT_VM) {
- assert(vm->arch.sev_fd == -1);
+ TEST_ASSERT_EQ(vm->arch.sev_fd, -1);
vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
} else {
struct kvm_sev_init init = { 0 };
- assert(vm->type == KVM_X86_SEV_ES_VM);
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SEV_ES_VM);
vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
}
}
+void snp_vm_init(struct kvm_vm *vm)
+{
+ struct kvm_sev_init init = { 0 };
+
+ TEST_ASSERT_EQ(vm->type, KVM_X86_SNP_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+}
+
void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
{
struct kvm_sev_launch_start launch_start = {
@@ -76,7 +95,7 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
- encrypt_region(vm, region);
+ encrypt_region(vm, region, KVM_SEV_PAGE_TYPE_INVALID, false);
if (policy & SEV_POLICY_ES)
vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
@@ -112,6 +131,33 @@ void sev_vm_launch_finish(struct kvm_vm *vm)
TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
}
+void snp_vm_launch_start(struct kvm_vm *vm, uint64_t policy)
+{
+ struct kvm_sev_snp_launch_start launch_start = {
+ .policy = policy,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_START, &launch_start);
+}
+
+void snp_vm_launch_update(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region;
+ int ctr;
+
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+ encrypt_region(vm, region, KVM_SEV_SNP_PAGE_TYPE_NORMAL, true);
+
+ vm->arch.is_pt_protected = true;
+}
+
+void snp_vm_launch_finish(struct kvm_vm *vm)
+{
+ struct kvm_sev_snp_launch_finish launch_finish = { 0 };
+
+ vm_sev_ioctl(vm, KVM_SEV_SNP_LAUNCH_FINISH, &launch_finish);
+}
+
struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
struct kvm_vcpu **cpu)
{
@@ -128,8 +174,20 @@ struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
return vm;
}
-void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
+void vm_sev_launch(struct kvm_vm *vm, uint64_t policy, uint8_t *measurement)
{
+ if (is_sev_snp_vm(vm)) {
+ vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, BIT(KVM_HC_MAP_GPA_RANGE));
+
+ snp_vm_launch_start(vm, policy);
+
+ snp_vm_launch_update(vm);
+
+ snp_vm_launch_finish(vm);
+
+ return;
+ }
+
sev_vm_launch(vm, policy);
if (!measurement)
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index 9e370800a6a2..f962fefc48fa 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -15,7 +15,7 @@
static int timer_irq = IRQ_S_TIMER;
-static void guest_irq_handler(struct ex_regs *regs)
+static void guest_irq_handler(struct pt_regs *regs)
{
uint64_t xcnt, xcnt_diff_us, cmp;
unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG;
diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c
index cfed6c727bfc..739d17befb5a 100644
--- a/tools/testing/selftests/kvm/riscv/ebreak_test.c
+++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c
@@ -27,7 +27,7 @@ static void guest_code(void)
GUEST_DONE();
}
-static void guest_breakpoint_handler(struct ex_regs *regs)
+static void guest_breakpoint_handler(struct pt_regs *regs)
{
WRITE_ONCE(sw_bp_addr, regs->epc);
regs->epc += 4;
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 569f2d67c9b8..a0b7dabb5040 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -17,6 +17,15 @@ enum {
VCPU_FEATURE_SBI_EXT,
};
+enum {
+ KVM_RISC_V_REG_OFFSET_VSTART = 0,
+ KVM_RISC_V_REG_OFFSET_VL,
+ KVM_RISC_V_REG_OFFSET_VTYPE,
+ KVM_RISC_V_REG_OFFSET_VCSR,
+ KVM_RISC_V_REG_OFFSET_VLENB,
+ KVM_RISC_V_REG_OFFSET_MAX,
+};
+
static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX];
bool filter_reg(__u64 reg)
@@ -143,6 +152,38 @@ bool check_reject_set(int err)
return err == EINVAL;
}
+static int override_vector_reg_size(struct kvm_vcpu *vcpu, struct vcpu_reg_sublist *s,
+ uint64_t feature)
+{
+ unsigned long vlenb_reg = 0;
+ int rc;
+ u64 reg, size;
+
+ /* Enable V extension so that we can get the vlenb register */
+ rc = __vcpu_set_reg(vcpu, feature, 1);
+ if (rc)
+ return rc;
+
+ vlenb_reg = vcpu_get_reg(vcpu, s->regs[KVM_RISC_V_REG_OFFSET_VLENB]);
+ if (!vlenb_reg) {
+ TEST_FAIL("Can't compute vector register size from zero vlenb\n");
+ return -EPERM;
+ }
+
+ size = __builtin_ctzl(vlenb_reg);
+ size <<= KVM_REG_SIZE_SHIFT;
+
+ for (int i = 0; i < 32; i++) {
+ reg = KVM_REG_RISCV | KVM_REG_RISCV_VECTOR | size | KVM_REG_RISCV_VECTOR_REG(i);
+ s->regs[KVM_RISC_V_REG_OFFSET_MAX + i] = reg;
+ }
+
+ /* We should assert if disabling failed here while enabling succeeded before */
+ vcpu_set_reg(vcpu, feature, 0);
+
+ return 0;
+}
+
void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
{
unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 };
@@ -172,6 +213,13 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
if (!s->feature)
continue;
+ if (s->feature == KVM_RISCV_ISA_EXT_V) {
+ feature = RISCV_ISA_EXT_REG(s->feature);
+ rc = override_vector_reg_size(vcpu, s, feature);
+ if (rc)
+ goto skip;
+ }
+
switch (s->feature_type) {
case VCPU_FEATURE_ISA_EXT:
feature = RISCV_ISA_EXT_REG(s->feature);
@@ -186,6 +234,7 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
/* Try to enable the desired extension */
__vcpu_set_reg(vcpu, feature, 1);
+skip:
/* Double check whether the desired extension was enabled */
__TEST_REQUIRE(__vcpu_has_ext(vcpu, feature),
"%s not available, skipping tests", s->name);
@@ -410,6 +459,35 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id)
return strdup_printf("%lld /* UNKNOWN */", reg_off);
}
+static const char *vector_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct __riscv_v_ext_state */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_VECTOR);
+ int reg_index = 0;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_VECTOR);
+
+ if (reg_off >= KVM_REG_RISCV_VECTOR_REG(0))
+ reg_index = reg_off - KVM_REG_RISCV_VECTOR_REG(0);
+ switch (reg_off) {
+ case KVM_REG_RISCV_VECTOR_REG(0) ...
+ KVM_REG_RISCV_VECTOR_REG(31):
+ return strdup_printf("KVM_REG_RISCV_VECTOR_REG(%d)", reg_index);
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vstart):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vstart)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vl):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vl)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vtype):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vtype)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vcsr)";
+ case KVM_REG_RISCV_VECTOR_CSR_REG(vlenb):
+ return "KVM_REG_RISCV_VECTOR_CSR_REG(vlenb)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
#define KVM_ISA_EXT_ARR(ext) \
[KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext
@@ -639,6 +717,9 @@ void print_reg(const char *prefix, __u64 id)
case KVM_REG_SIZE_U128:
reg_size = "KVM_REG_SIZE_U128";
break;
+ case KVM_REG_SIZE_U256:
+ reg_size = "KVM_REG_SIZE_U256";
+ break;
default:
printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n",
(id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK);
@@ -670,6 +751,10 @@ void print_reg(const char *prefix, __u64 id)
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n",
reg_size, fp_d_id_to_str(prefix, id));
break;
+ case KVM_REG_RISCV_VECTOR:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_VECTOR | %s,\n",
+ reg_size, vector_id_to_str(prefix, id));
+ break;
case KVM_REG_RISCV_ISA_EXT:
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n",
reg_size, isa_ext_id_to_str(prefix, id));
@@ -874,6 +959,48 @@ static __u64 fp_d_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D,
};
+/* Define a default vector registers with length. This will be overwritten at runtime */
+static __u64 vector_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vstart),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vl),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vtype),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vcsr),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_CSR_REG(vlenb),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(0),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(1),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(2),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(3),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(4),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(5),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(6),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(7),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(8),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(9),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(10),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(11),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(12),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(13),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(14),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(15),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(16),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(17),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(18),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(19),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(20),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(21),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(22),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(23),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(24),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(25),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(26),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(27),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(28),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(29),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(30),
+ KVM_REG_RISCV | KVM_REG_SIZE_U128 | KVM_REG_RISCV_VECTOR | KVM_REG_RISCV_VECTOR_REG(31),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V,
+};
+
#define SUBLIST_BASE \
{"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \
.skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),}
@@ -898,6 +1025,9 @@ static __u64 fp_d_regs[] = {
{"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \
.regs_n = ARRAY_SIZE(fp_d_regs),}
+#define SUBLIST_V \
+ {"v", .feature = KVM_RISCV_ISA_EXT_V, .regs = vector_regs, .regs_n = ARRAY_SIZE(vector_regs),}
+
#define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \
static __u64 regs_##ext[] = { \
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \
@@ -966,6 +1096,7 @@ KVM_SBI_EXT_SIMPLE_CONFIG(susp, SUSP);
KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
+KVM_ISA_EXT_SUBLIST_CONFIG(v, V);
KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
KVM_ISA_EXT_SIMPLE_CONFIG(smnpm, SMNPM);
KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN);
@@ -1040,6 +1171,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_fp_f,
&config_fp_d,
&config_h,
+ &config_v,
&config_smnpm,
&config_smstateen,
&config_sscofpmf,
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
index 03406de4989d..924a335d2262 100644
--- a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -73,7 +73,6 @@ unsigned long pmu_csr_read_num(int csr_num)
switch (csr_num) {
switchcase_csr_read_32(CSR_CYCLE, ret)
- switchcase_csr_read_32(CSR_CYCLEH, ret)
default :
break;
}
@@ -128,17 +127,36 @@ static void stop_counter(unsigned long counter, unsigned long stop_flags)
"Unable to stop counter %ld error %ld\n", counter, ret.error);
}
-static void guest_illegal_exception_handler(struct ex_regs *regs)
+static void guest_illegal_exception_handler(struct pt_regs *regs)
{
+ unsigned long insn;
+ int opcode, csr_num, funct3;
+
__GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL,
"Unexpected exception handler %lx\n", regs->cause);
+ insn = regs->badaddr;
+ opcode = (insn & INSN_OPCODE_MASK) >> INSN_OPCODE_SHIFT;
+ __GUEST_ASSERT(opcode == INSN_OPCODE_SYSTEM,
+ "Unexpected instruction with opcode 0x%x insn 0x%lx\n", opcode, insn);
+
+ csr_num = GET_CSR_NUM(insn);
+ funct3 = GET_RM(insn);
+ /* Validate if it is a CSR read/write operation */
+ __GUEST_ASSERT(funct3 <= 7 && (funct3 != 0 && funct3 != 4),
+ "Unexpected system opcode with funct3 0x%x csr_num 0x%x\n",
+ funct3, csr_num);
+
+ /* Validate if it is a HPMCOUNTER CSR operation */
+ __GUEST_ASSERT((csr_num >= CSR_CYCLE && csr_num <= CSR_HPMCOUNTER31),
+ "Unexpected csr_num 0x%x\n", csr_num);
+
illegal_handler_invoked = true;
/* skip the trapping instruction */
regs->epc += 4;
}
-static void guest_irq_handler(struct ex_regs *regs)
+static void guest_irq_handler(struct pt_regs *regs)
{
unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index bc440d5aba57..ce3ac0fd6dfb 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -350,7 +350,7 @@ static void test_invalid_memory_region_flags(void)
struct kvm_vm *vm;
int r, i;
-#if defined __aarch64__ || defined __riscv || defined __x86_64__
+#if defined __aarch64__ || defined __riscv || defined __x86_64__ || defined __loongarch__
supported_flags |= KVM_MEM_READONLY;
#endif
diff --git a/tools/testing/selftests/kvm/x86/fastops_test.c b/tools/testing/selftests/kvm/x86/fastops_test.c
new file mode 100644
index 000000000000..2ac89d6c1e46
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/fastops_test.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Execute a fastop() instruction, with or without forced emulation. BT bit 0
+ * to set RFLAGS.CF based on whether or not the input is even or odd, so that
+ * instructions like ADC and SBB are deterministic.
+ */
+#define guest_execute_fastop_1(FEP, insn, __val, __flags) \
+({ \
+ __asm__ __volatile__("bt $0, %[val]\n\t" \
+ FEP insn " %[val]\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t" \
+ : [val]"+r"(__val), [flags]"=r"(__flags) \
+ : : "cc", "memory"); \
+})
+
+#define guest_test_fastop_1(insn, type_t, __val) \
+({ \
+ type_t val = __val, ex_val = __val, input = __val; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_1("", insn, ex_val, ex_flags); \
+ guest_execute_fastop_1(KVM_FEP, insn, val, flags); \
+ \
+ __GUEST_ASSERT(val == ex_val, \
+ "Wanted 0x%lx for '%s 0x%lx', got 0x%lx", \
+ (uint64_t)ex_val, insn, (uint64_t)input, (uint64_t)val); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)input, flags); \
+})
+
+#define guest_execute_fastop_2(FEP, insn, __input, __output, __flags) \
+({ \
+ __asm__ __volatile__("bt $0, %[output]\n\t" \
+ FEP insn " %[input], %[output]\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t" \
+ : [output]"+r"(__output), [flags]"=r"(__flags) \
+ : [input]"r"(__input) : "cc", "memory"); \
+})
+
+#define guest_test_fastop_2(insn, type_t, __val1, __val2) \
+({ \
+ type_t input = __val1, input2 = __val2, output = __val2, ex_output = __val2; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_2("", insn, input, ex_output, ex_flags); \
+ guest_execute_fastop_2(KVM_FEP, insn, input, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%lx 0x%lx', got 0x%lx", \
+ (uint64_t)ex_output, insn, (uint64_t)input, \
+ (uint64_t)input2, (uint64_t)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%lx, 0x%lx', got 0x%lx", \
+ ex_flags, insn, (uint64_t)input, (uint64_t)input2, flags); \
+})
+
+#define guest_execute_fastop_cl(FEP, insn, __shift, __output, __flags) \
+({ \
+ __asm__ __volatile__("bt $0, %[output]\n\t" \
+ FEP insn " %%cl, %[output]\n\t" \
+ "pushfq\n\t" \
+ "pop %[flags]\n\t" \
+ : [output]"+r"(__output), [flags]"=r"(__flags) \
+ : "c"(__shift) : "cc", "memory"); \
+})
+
+#define guest_test_fastop_cl(insn, type_t, __val1, __val2) \
+({ \
+ type_t output = __val2, ex_output = __val2, input = __val2; \
+ uint8_t shift = __val1; \
+ uint64_t flags, ex_flags; \
+ \
+ guest_execute_fastop_cl("", insn, shift, ex_output, ex_flags); \
+ guest_execute_fastop_cl(KVM_FEP, insn, shift, output, flags); \
+ \
+ __GUEST_ASSERT(output == ex_output, \
+ "Wanted 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ (uint64_t)ex_output, insn, shift, (uint64_t)input, \
+ (uint64_t)output); \
+ __GUEST_ASSERT(flags == ex_flags, \
+ "Wanted flags 0x%lx for '%s 0x%x, 0x%lx', got 0x%lx", \
+ ex_flags, insn, shift, (uint64_t)input, flags); \
+})
+
+static const uint64_t vals[] = {
+ 0,
+ 1,
+ 2,
+ 4,
+ 7,
+ 0x5555555555555555,
+ 0xaaaaaaaaaaaaaaaa,
+ 0xfefefefefefefefe,
+ 0xffffffffffffffff,
+};
+
+#define guest_test_fastops(type_t, suffix) \
+do { \
+ int i, j; \
+ \
+ for (i = 0; i < ARRAY_SIZE(vals); i++) { \
+ guest_test_fastop_1("dec" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("inc" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("neg" suffix, type_t, vals[i]); \
+ guest_test_fastop_1("not" suffix, type_t, vals[i]); \
+ \
+ for (j = 0; j < ARRAY_SIZE(vals); j++) { \
+ guest_test_fastop_2("add" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("adc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("and" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bsf" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bsr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bt" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btc" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("btr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("bts" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("cmp" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("imul" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("or" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sbb" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("sub" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("test" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_2("xor" suffix, type_t, vals[i], vals[j]); \
+ \
+ guest_test_fastop_cl("rol" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("ror" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("rcr" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("sar" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shl" suffix, type_t, vals[i], vals[j]); \
+ guest_test_fastop_cl("shr" suffix, type_t, vals[i], vals[j]); \
+ } \
+ } \
+} while (0)
+
+static void guest_code(void)
+{
+ guest_test_fastops(uint16_t, "w");
+ guest_test_fastops(uint32_t, "l");
+ guest_test_fastops(uint64_t, "q");
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(is_forced_emulation_enabled);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
index 4e920705681a..c863a689aa98 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
@@ -22,25 +22,6 @@ static void guest_code(void)
{
}
-static bool smt_possible(void)
-{
- char buf[16];
- FILE *f;
- bool res = true;
-
- f = fopen("/sys/devices/system/cpu/smt/control", "r");
- if (f) {
- if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
- if (!strncmp(buf, "forceoff", 8) ||
- !strncmp(buf, "notsupported", 12))
- res = false;
- }
- fclose(f);
- }
-
- return res;
-}
-
static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
{
const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip;
@@ -93,7 +74,7 @@ static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
case 0x40000004:
test_val = entry->eax & (1UL << 18);
- TEST_ASSERT(!!test_val == !smt_possible(),
+ TEST_ASSERT(!!test_val == !is_smt_possible(),
"NoNonArchitecturalCoreSharing bit"
" doesn't reflect SMT setting");
diff --git a/tools/testing/selftests/kvm/x86/kvm_buslock_test.c b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
new file mode 100644
index 000000000000..d88500c118eb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86/kvm_buslock_test.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 Advanced Micro Devices, Inc.
+ */
+#include <linux/atomic.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "vmx.h"
+#include "test_util.h"
+
+#define NR_BUS_LOCKS_PER_LEVEL 100
+#define CACHE_LINE_SIZE 64
+
+/*
+ * To generate a bus lock, carve out a buffer that precisely occupies two cache
+ * lines and perform an atomic access that splits the two lines.
+ */
+static u8 buffer[CACHE_LINE_SIZE * 2] __aligned(CACHE_LINE_SIZE);
+static atomic_t *val = (void *)&buffer[CACHE_LINE_SIZE - (sizeof(*val) / 2)];
+
+static void guest_generate_buslocks(void)
+{
+ for (int i = 0; i < NR_BUS_LOCKS_PER_LEVEL; i++)
+ atomic_inc(val);
+}
+
+#define L2_GUEST_STACK_SIZE 64
+
+static void l2_guest_code(void)
+{
+ guest_generate_buslocks();
+ GUEST_DONE();
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ run_guest(vmcb, svm->vmcb_gpa);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code));
+ GUEST_ASSERT(!vmlaunch());
+}
+
+static void guest_code(void *test_data)
+{
+ guest_generate_buslocks();
+
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(test_data);
+ else if (this_cpu_has(X86_FEATURE_VMX))
+ l1_vmx_code(test_data);
+ else
+ GUEST_DONE();
+
+ TEST_FAIL("L2 should have signaled 'done'");
+}
+
+int main(int argc, char *argv[])
+{
+ const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX);
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ int i, bus_locks = 0;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_BUS_LOCK_EXIT));
+
+ vm = vm_create(1);
+ vm_enable_cap(vm, KVM_CAP_X86_BUS_LOCK_EXIT, KVM_BUS_LOCK_DETECTION_EXIT);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ run = vcpu->run;
+
+ for (i = 0; i <= NR_BUS_LOCKS_PER_LEVEL * (1 + has_nested); i++) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_IO) {
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto done;
+ case UCALL_SYNC:
+ continue;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_BUS_LOCK);
+
+ /*
+ * Verify the counter is actually getting incremented, e.g. that
+ * KVM isn't skipping the instruction. On Intel, the exit is
+ * trap-like, i.e. the counter should already have been
+ * incremented. On AMD, it's fault-like, i.e. the counter will
+ * be incremented when the guest re-executes the instruction.
+ */
+ sync_global_from_guest(vm, *val);
+ TEST_ASSERT_EQ(atomic_read(val), bus_locks + host_cpu_is_intel);
+
+ bus_locks++;
+ }
+ TEST_FAIL("Didn't receive UCALL_DONE, took %u bus lock exits\n", bus_locks);
+done:
+ TEST_ASSERT_EQ(i, bus_locks);
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86/sev_init2_tests.c b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
index 3fb967f40c6a..b238615196ad 100644
--- a/tools/testing/selftests/kvm/x86/sev_init2_tests.c
+++ b/tools/testing/selftests/kvm/x86/sev_init2_tests.c
@@ -28,6 +28,7 @@
int kvm_fd;
u64 supported_vmsa_features;
bool have_sev_es;
+bool have_snp;
static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
{
@@ -83,6 +84,9 @@ void test_vm_types(void)
if (have_sev_es)
test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
+ if (have_snp)
+ test_init2(KVM_X86_SNP_VM, &(struct kvm_sev_init){});
+
test_init2_invalid(0, &(struct kvm_sev_init){},
"VM type is KVM_X86_DEFAULT_VM");
if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
@@ -138,15 +142,24 @@ int main(int argc, char *argv[])
"sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
+ have_snp = kvm_cpu_has(X86_FEATURE_SEV_SNP);
+ TEST_ASSERT(have_snp == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SNP_VM)),
+ "sev-snp: KVM_CAP_VM_TYPES (%x) indicates SNP support (bit %d), but CPUID does not",
+ kvm_check_cap(KVM_CAP_VM_TYPES), KVM_X86_SNP_VM);
+
test_vm_types();
test_flags(KVM_X86_SEV_VM);
if (have_sev_es)
test_flags(KVM_X86_SEV_ES_VM);
+ if (have_snp)
+ test_flags(KVM_X86_SNP_VM);
test_features(KVM_X86_SEV_VM, 0);
if (have_sev_es)
test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
+ if (have_snp)
+ test_features(KVM_X86_SNP_VM, supported_vmsa_features);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index d97816dc476a..77256c89bb8d 100644
--- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -16,6 +16,18 @@
#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
+static void guest_snp_code(void)
+{
+ uint64_t sev_msr = rdmsr(MSR_AMD64_SEV);
+
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ENABLED);
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_ES_ENABLED);
+ GUEST_ASSERT(sev_msr & MSR_AMD64_SEV_SNP_ENABLED);
+
+ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+ vmgexit();
+}
+
static void guest_sev_es_code(void)
{
/* TODO: Check CPUID after GHCB-based hypercall support is added. */
@@ -27,7 +39,7 @@ static void guest_sev_es_code(void)
* force "termination" to signal "done" via the GHCB MSR protocol.
*/
wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
- __asm__ __volatile__("rep; vmmcall");
+ vmgexit();
}
static void guest_sev_code(void)
@@ -62,7 +74,7 @@ static void compare_xsave(u8 *from_host, u8 *from_guest)
abort();
}
-static void test_sync_vmsa(uint32_t policy)
+static void test_sync_vmsa(uint32_t type, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
@@ -72,7 +84,7 @@ static void test_sync_vmsa(uint32_t policy)
double x87val = M_PI;
struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
- vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
+ vm = vm_sev_create_with_one_vcpu(type, guest_code_xsave, &vcpu);
gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
MEM_REGION_TEST_DATA);
hva = addr_gva2hva(vm, gva);
@@ -89,7 +101,7 @@ static void test_sync_vmsa(uint32_t policy)
: "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
vcpu_xsave_set(vcpu, &xsave);
- vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
+ vm_sev_launch(vm, policy, NULL);
/* This page is shared, so make it decrypted. */
memset(hva, 0, 4096);
@@ -108,14 +120,12 @@ static void test_sync_vmsa(uint32_t policy)
kvm_vm_free(vm);
}
-static void test_sev(void *guest_code, uint64_t policy)
+static void test_sev(void *guest_code, uint32_t type, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
- uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
-
vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
/* TODO: Validate the measurement is as expected. */
@@ -124,7 +134,7 @@ static void test_sev(void *guest_code, uint64_t policy)
for (;;) {
vcpu_run(vcpu);
- if (policy & SEV_POLICY_ES) {
+ if (is_sev_es_vm(vm)) {
TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
"Wanted SYSTEM_EVENT, got %s",
exit_reason_str(vcpu->run->exit_reason));
@@ -161,16 +171,14 @@ static void guest_shutdown_code(void)
__asm__ __volatile__("ud2");
}
-static void test_sev_es_shutdown(void)
+static void test_sev_shutdown(uint32_t type, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- uint32_t type = KVM_X86_SEV_ES_VM;
-
vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu);
- vm_sev_launch(vm, SEV_POLICY_ES, NULL);
+ vm_sev_launch(vm, policy, NULL);
vcpu_run(vcpu);
TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN,
@@ -180,27 +188,42 @@ static void test_sev_es_shutdown(void)
kvm_vm_free(vm);
}
-int main(int argc, char *argv[])
+static void test_sev_smoke(void *guest, uint32_t type, uint64_t policy)
{
const u64 xf_mask = XFEATURE_MASK_X87_AVX;
- TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
-
- test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
- test_sev(guest_sev_code, 0);
+ if (type == KVM_X86_SNP_VM)
+ test_sev(guest, type, policy | SNP_POLICY_DBG);
+ else
+ test_sev(guest, type, policy | SEV_POLICY_NO_DBG);
+ test_sev(guest, type, policy);
- if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
- test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
- test_sev(guest_sev_es_code, SEV_POLICY_ES);
+ if (type == KVM_X86_SEV_VM)
+ return;
- test_sev_es_shutdown();
+ test_sev_shutdown(type, policy);
- if (kvm_has_cap(KVM_CAP_XCRS) &&
- (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
- test_sync_vmsa(0);
- test_sync_vmsa(SEV_POLICY_NO_DBG);
- }
+ if (kvm_has_cap(KVM_CAP_XCRS) &&
+ (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) {
+ test_sync_vmsa(type, policy);
+ if (type == KVM_X86_SNP_VM)
+ test_sync_vmsa(type, policy | SNP_POLICY_DBG);
+ else
+ test_sync_vmsa(type, policy | SEV_POLICY_NO_DBG);
}
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ test_sev_smoke(guest_sev_code, KVM_X86_SEV_VM, 0);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_ES))
+ test_sev_smoke(guest_sev_es_code, KVM_X86_SEV_ES_VM, SEV_POLICY_ES);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_SNP))
+ test_sev_smoke(guest_snp_code, KVM_X86_SNP_VM, snp_default_policy());
return 0;
}
diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h
index b9054086a0c9..18a6014920b5 100644
--- a/tools/testing/selftests/landlock/audit.h
+++ b/tools/testing/selftests/landlock/audit.h
@@ -300,15 +300,22 @@ out:
return err;
}
-static int __maybe_unused matches_log_domain_allocated(int audit_fd,
+static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid,
__u64 *domain_id)
{
- return audit_match_record(
- audit_fd, AUDIT_LANDLOCK_DOMAIN,
- REGEX_LANDLOCK_PREFIX
- " status=allocated mode=enforcing pid=[0-9]\\+ uid=[0-9]\\+"
- " exe=\"[^\"]\\+\" comm=\".*_test\"$",
- domain_id);
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " status=allocated mode=enforcing pid=%d uid=[0-9]\\+"
+ " exe=\"[^\"]\\+\" comm=\".*_test\"$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len =
+ snprintf(log_match, sizeof(log_match), log_template, pid);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match,
+ domain_id);
}
static int __maybe_unused matches_log_domain_deallocated(
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
index a0643070c403..cfc571afd0eb 100644
--- a/tools/testing/selftests/landlock/audit_test.c
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -9,6 +9,7 @@
#include <errno.h>
#include <limits.h>
#include <linux/landlock.h>
+#include <pthread.h>
#include <stdlib.h>
#include <sys/mount.h>
#include <sys/prctl.h>
@@ -40,7 +41,6 @@ FIXTURE(audit)
{
struct audit_filter audit_filter;
int audit_fd;
- __u64(*domain_stack)[16];
};
FIXTURE_SETUP(audit)
@@ -60,18 +60,10 @@ FIXTURE_SETUP(audit)
TH_LOG("Failed to initialize audit: %s", error_msg);
}
clear_cap(_metadata, CAP_AUDIT_CONTROL);
-
- self->domain_stack = mmap(NULL, sizeof(*self->domain_stack),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, -1, 0);
- ASSERT_NE(MAP_FAILED, self->domain_stack);
- memset(self->domain_stack, 0, sizeof(*self->domain_stack));
}
FIXTURE_TEARDOWN(audit)
{
- EXPECT_EQ(0, munmap(self->domain_stack, sizeof(*self->domain_stack)));
-
set_cap(_metadata, CAP_AUDIT_CONTROL);
EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
clear_cap(_metadata, CAP_AUDIT_CONTROL);
@@ -83,9 +75,15 @@ TEST_F(audit, layers)
.scoped = LANDLOCK_SCOPE_SIGNAL,
};
int status, ruleset_fd, i;
+ __u64(*domain_stack)[16];
__u64 prev_dom = 3;
pid_t child;
+ domain_stack = mmap(NULL, sizeof(*domain_stack), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, domain_stack);
+ memset(domain_stack, 0, sizeof(*domain_stack));
+
ruleset_fd =
landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
@@ -94,7 +92,7 @@ TEST_F(audit, layers)
child = fork();
ASSERT_LE(0, child);
if (child == 0) {
- for (i = 0; i < ARRAY_SIZE(*self->domain_stack); i++) {
+ for (i = 0; i < ARRAY_SIZE(*domain_stack); i++) {
__u64 denial_dom = 1;
__u64 allocated_dom = 2;
@@ -107,7 +105,8 @@ TEST_F(audit, layers)
matches_log_signal(_metadata, self->audit_fd,
getppid(), &denial_dom));
EXPECT_EQ(0, matches_log_domain_allocated(
- self->audit_fd, &allocated_dom));
+ self->audit_fd, getpid(),
+ &allocated_dom));
EXPECT_NE(denial_dom, 1);
EXPECT_NE(denial_dom, 0);
EXPECT_EQ(denial_dom, allocated_dom);
@@ -115,7 +114,7 @@ TEST_F(audit, layers)
/* Checks that the new domain is younger than the previous one. */
EXPECT_GT(allocated_dom, prev_dom);
prev_dom = allocated_dom;
- (*self->domain_stack)[i] = allocated_dom;
+ (*domain_stack)[i] = allocated_dom;
}
/* Checks that we reached the maximum number of layers. */
@@ -142,23 +141,143 @@ TEST_F(audit, layers)
/* Purges log from deallocated domains. */
EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
&audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
- for (i = ARRAY_SIZE(*self->domain_stack) - 1; i >= 0; i--) {
+ for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) {
__u64 deallocated_dom = 2;
EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
&deallocated_dom));
- EXPECT_EQ((*self->domain_stack)[i], deallocated_dom)
+ EXPECT_EQ((*domain_stack)[i], deallocated_dom)
{
TH_LOG("Failed to match domain %llx (#%d)",
- (*self->domain_stack)[i], i);
+ (*domain_stack)[i], i);
}
}
+ EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack)));
EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
&audit_tv_default, sizeof(audit_tv_default)));
-
EXPECT_EQ(0, close(ruleset_fd));
}
+struct thread_data {
+ pid_t parent_pid;
+ int ruleset_fd, pipe_child, pipe_parent;
+};
+
+static void *thread_audit_test(void *arg)
+{
+ const struct thread_data *data = (struct thread_data *)arg;
+ uintptr_t err = 0;
+ char buffer;
+
+ /* TGID and TID are different for a second thread. */
+ if (getpid() == gettid()) {
+ err = 1;
+ goto out;
+ }
+
+ if (landlock_restrict_self(data->ruleset_fd, 0)) {
+ err = 2;
+ goto out;
+ }
+
+ if (close(data->ruleset_fd)) {
+ err = 3;
+ goto out;
+ }
+
+ /* Creates a denial to get the domain ID. */
+ if (kill(data->parent_pid, 0) != -1) {
+ err = 4;
+ goto out;
+ }
+
+ if (EPERM != errno) {
+ err = 5;
+ goto out;
+ }
+
+ /* Signals the parent to read denial logs. */
+ if (write(data->pipe_child, ".", 1) != 1) {
+ err = 6;
+ goto out;
+ }
+
+ /* Waits for the parent to update audit filters. */
+ if (read(data->pipe_parent, &buffer, 1) != 1) {
+ err = 7;
+ goto out;
+ }
+
+out:
+ close(data->pipe_child);
+ close(data->pipe_parent);
+ return (void *)err;
+}
+
+/* Checks that the PID tied to a domain is not a TID but the TGID. */
+TEST_F(audit, thread)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ __u64 denial_dom = 1;
+ __u64 allocated_dom = 2;
+ __u64 deallocated_dom = 3;
+ pthread_t thread;
+ int pipe_child[2], pipe_parent[2];
+ char buffer;
+ struct thread_data child_data;
+
+ child_data.parent_pid = getppid();
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ child_data.pipe_child = pipe_child[1];
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ child_data.pipe_parent = pipe_parent[0];
+ child_data.ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, child_data.ruleset_fd);
+
+ /* TGID and TID are the same for the initial thread . */
+ EXPECT_EQ(getpid(), gettid());
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, pthread_create(&thread, NULL, thread_audit_test,
+ &child_data));
+
+ /* Waits for the child to generate a denial. */
+ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+ EXPECT_EQ(0, close(pipe_child[0]));
+
+ /* Matches the signal log to get the domain ID. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ child_data.parent_pid, &denial_dom));
+ EXPECT_NE(denial_dom, 1);
+ EXPECT_NE(denial_dom, 0);
+
+ EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, getpid(),
+ &allocated_dom));
+ EXPECT_EQ(denial_dom, allocated_dom);
+
+ /* Updates filter rules to match the drop record. */
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE));
+ EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ /* Signals the thread to exit, which will generate a domain deallocation. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(0, pthread_join(thread, NULL));
+
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
+ EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
+ &deallocated_dom));
+ EXPECT_EQ(denial_dom, deallocated_dom);
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_default, sizeof(audit_tv_default)));
+}
+
FIXTURE(audit_flags)
{
struct audit_filter audit_filter;
@@ -273,7 +392,8 @@ TEST_F(audit_flags, signal)
/* Checks domain information records. */
EXPECT_EQ(0, matches_log_domain_allocated(
- self->audit_fd, &allocated_dom));
+ self->audit_fd, getpid(),
+ &allocated_dom));
EXPECT_NE(*self->domain_id, 1);
EXPECT_NE(*self->domain_id, 0);
EXPECT_EQ(*self->domain_id, allocated_dom);
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index f819011a8798..73729382d40f 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -5964,7 +5964,8 @@ TEST_F(audit_layout1, refer_handled)
EXPECT_EQ(EXDEV, errno);
EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
dir_s1d1));
- EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, NULL));
+ EXPECT_EQ(0,
+ matches_log_domain_allocated(self->audit_fd, getpid(), NULL));
EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
dir_s1d3));
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index 81a1f64a22e8..377b3699ff31 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,2 +1,3 @@
CONFIG_TEST_BITMAP=m
+CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_BITOPS=m
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
index efabfcbe0b49..17ed3e9917ca 100644
--- a/tools/testing/selftests/mincore/mincore_selftest.c
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -283,8 +283,7 @@ out_free:
/*
* Test mincore() behavior on a page backed by a tmpfs file. This test
- * performs the same steps as the previous one. However, we don't expect
- * any readahead in this case.
+ * performs the same steps as the previous one.
*/
TEST(check_tmpfs_mmap)
{
@@ -295,7 +294,6 @@ TEST(check_tmpfs_mmap)
int page_size;
int fd;
int i;
- int ra_pages = 0;
page_size = sysconf(_SC_PAGESIZE);
vec_size = FILE_SIZE / page_size;
@@ -338,8 +336,7 @@ TEST(check_tmpfs_mmap)
}
/*
- * Touch a page in the middle of the mapping. We expect only
- * that page to be fetched into memory.
+ * Touch a page in the middle of the mapping.
*/
addr[FILE_SIZE / 2] = 1;
retval = mincore(addr, FILE_SIZE, vec);
@@ -348,15 +345,6 @@ TEST(check_tmpfs_mmap)
TH_LOG("Page not found in memory after use");
}
- i = FILE_SIZE / 2 / page_size + 1;
- while (i < vec_size && vec[i]) {
- ra_pages++;
- i++;
- }
- ASSERT_EQ(ra_pages, 0) {
- TH_LOG("Read-ahead pages found in memory");
- }
-
munmap(addr, FILE_SIZE);
close(fd);
free(vec);
diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
index 67df7b47087f..e1fe16bcbbe8 100755
--- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
@@ -29,7 +29,7 @@ fi
if [[ $cgroup2 ]]; then
cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}')
if [[ -z "$cgroup_path" ]]; then
- cgroup_path=/dev/cgroup/memory
+ cgroup_path=$(mktemp -d)
mount -t cgroup2 none $cgroup_path
do_umount=1
fi
@@ -37,7 +37,7 @@ if [[ $cgroup2 ]]; then
else
cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
if [[ -z "$cgroup_path" ]]; then
- cgroup_path=/dev/cgroup/memory
+ cgroup_path=$(mktemp -d)
mount -t cgroup memory,hugetlb $cgroup_path
do_umount=1
fi
diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
index 2c3a0eb6b22d..9bc4591c7b16 100644
--- a/tools/testing/selftests/mm/compaction_test.c
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
int compaction_index = 0;
char nr_hugepages[20] = {0};
char init_nr_hugepages[24] = {0};
+ char target_nr_hugepages[24] = {0};
+ int slen;
snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
"%lu", initial_nr_hugepages);
@@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
goto out;
}
- /* Request a large number of huge pages. The Kernel will allocate
- as much as it can */
- if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
- ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
+ /*
+ * Request huge pages for about half of the free memory. The Kernel
+ * will allocate as much as it can, and we expect it will get at least 1/3
+ */
+ nr_hugepages_ul = mem_free / hugepage_size / 2;
+ snprintf(target_nr_hugepages, sizeof(target_nr_hugepages),
+ "%lu", nr_hugepages_ul);
+
+ slen = strlen(target_nr_hugepages);
+ if (write(fd, target_nr_hugepages, slen) != slen) {
+ ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n",
+ nr_hugepages_ul, strerror(errno));
goto close_fd;
}
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index f0cb14ea8608..b6cfe0a4b7df 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -293,7 +293,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
.iov_base = mem,
.iov_len = size,
};
- ssize_t cur, total, transferred;
+ ssize_t cur, total, transferred = 0;
struct comm_pipes comm_pipes;
char *old, *new;
int ret, fds[2];
diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c
index b3d0e2771096..eba43ead13ae 100644
--- a/tools/testing/selftests/mm/guard-regions.c
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -271,12 +271,16 @@ FIXTURE_SETUP(guard_regions)
self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
setup_sighandler();
- if (variant->backing == ANON_BACKED)
+ switch (variant->backing) {
+ case ANON_BACKED:
return;
-
- self->fd = open_file(
- variant->backing == SHMEM_BACKED ? "/tmp/" : "",
- self->path);
+ case LOCAL_FILE_BACKED:
+ self->fd = open_file("", self->path);
+ break;
+ case SHMEM_BACKED:
+ self->fd = memfd_create(self->path, 0);
+ break;
+ }
/* We truncate file to at least 100 pages, tests can modify as needed. */
ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0);
@@ -1696,7 +1700,7 @@ TEST_F(guard_regions, readonly_file)
char *ptr;
int i;
- if (variant->backing == ANON_BACKED)
+ if (variant->backing != LOCAL_FILE_BACKED)
SKIP(return, "Read-only test specific to file-backed");
/* Map shared so we can populate with pattern, populate it, unmap. */
diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index 11f9bbe7dc22..0b0d4ba1af27 100755
--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
@@ -23,7 +23,7 @@ fi
if [[ $cgroup2 ]]; then
CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}')
if [[ -z "$CGROUP_ROOT" ]]; then
- CGROUP_ROOT=/dev/cgroup/memory
+ CGROUP_ROOT=$(mktemp -d)
mount -t cgroup2 none $CGROUP_ROOT
do_umount=1
fi
diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h
index 1bad310d282a..17bf2d1b0192 100644
--- a/tools/testing/selftests/mm/pkey-powerpc.h
+++ b/tools/testing/selftests/mm/pkey-powerpc.h
@@ -3,6 +3,8 @@
#ifndef _PKEYS_POWERPC_H
#define _PKEYS_POWERPC_H
+#include <sys/stat.h>
+
#ifndef SYS_pkey_alloc
# define SYS_pkey_alloc 384
# define SYS_pkey_free 385
@@ -102,8 +104,18 @@ static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
return;
}
+#define REPEAT_8(s) s s s s s s s s
+#define REPEAT_64(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) \
+ REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s)
+#define REPEAT_512(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) \
+ REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s)
+#define REPEAT_4096(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) \
+ REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s)
+#define REPEAT_16384(s) REPEAT_4096(s) REPEAT_4096(s) \
+ REPEAT_4096(s) REPEAT_4096(s)
+
/* 4-byte instructions * 16384 = 64K page */
-#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
+#define __page_o_noops() asm(REPEAT_16384("nop\n"))
static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
{
diff --git a/tools/testing/selftests/mm/pkey_util.c b/tools/testing/selftests/mm/pkey_util.c
index ca4ad0d44ab2..255b332f7a08 100644
--- a/tools/testing/selftests/mm/pkey_util.c
+++ b/tools/testing/selftests/mm/pkey_util.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#define __SANE_USERSPACE_TYPES__
#include <sys/syscall.h>
#include <unistd.h>
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 124078b56fa4..70a38f485d4d 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -31,6 +31,7 @@ TEST_PROGS += veth.sh
TEST_PROGS += ioam6.sh
TEST_PROGS += gro.sh
TEST_PROGS += gre_gso.sh
+TEST_PROGS += gre_ipv6_lladdr.sh
TEST_PROGS += cmsg_so_mark.sh
TEST_PROGS += cmsg_so_priority.sh
TEST_PROGS += test_so_rcv.sh
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index b866bab1d92a..c7cea556b416 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -359,6 +359,23 @@ fib_rule6_test()
"$getnomatch" "iif flowlabel masked redirect to table" \
"iif flowlabel masked no redirect to table"
fi
+
+ $IP link show dev $DEV | grep -q vrf0
+ if [ $? -eq 0 ]; then
+ match="oif vrf0"
+ getmatch="oif $DEV"
+ getnomatch="oif lo"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF oif redirect to table" \
+ "VRF oif no redirect to table"
+
+ match="from $SRC_IP6 iif vrf0"
+ getmatch="from $SRC_IP6 iif $DEV"
+ getnomatch="from $SRC_IP6 iif lo"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF iif redirect to table" \
+ "VRF iif no redirect to table"
+ fi
}
fib_rule6_vrf_test()
@@ -635,6 +652,23 @@ fib_rule4_test()
"$getnomatch" "iif dscp masked redirect to table" \
"iif dscp masked no redirect to table"
fi
+
+ $IP link show dev $DEV | grep -q vrf0
+ if [ $? -eq 0 ]; then
+ match="oif vrf0"
+ getmatch="oif $DEV"
+ getnomatch="oif lo"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF oif redirect to table" \
+ "VRF oif no redirect to table"
+
+ match="from $SRC_IP iif vrf0"
+ getmatch="from $SRC_IP iif $DEV"
+ getnomatch="from $SRC_IP iif lo"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF iif redirect to table" \
+ "VRF iif no redirect to table"
+ fi
}
fib_rule4_vrf_test()
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 90f8a244ea90..e59fba366a0a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged"
NUM_NETIFS=4
CHECK_TC="yes"
source lib.sh
@@ -194,6 +194,100 @@ other_tpid()
tc qdisc del dev $h2 clsact
}
+8021p_do()
+{
+ local should_fail=$1; shift
+ local mac=de:ad:be:ef:13:37
+
+ tc filter add dev $h2 ingress protocol all pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa"
+ sleep 1
+
+ tc -j -s filter show dev $h2 ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err_fail $should_fail $? "802.1p-tagged reception"
+
+ tc filter del dev $h2 ingress pref 1
+}
+
+8021p()
+{
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ ip link set $h2 promisc on
+
+ # Test that with the default_pvid, 1, packets tagged with VID 0 are
+ # accepted.
+ 8021p_do 0
+
+ # Test that packets tagged with VID 0 are still accepted after changing
+ # the default_pvid.
+ ip link set br0 type bridge vlan_default_pvid 10
+ 8021p_do 0
+
+ log_test "Reception of 802.1p-tagged traffic"
+
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
+send_untagged_and_8021p()
+{
+ ping_do $h1 192.0.2.2
+ check_fail $?
+
+ 8021p_do 1
+}
+
+drop_untagged()
+{
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ ip link set $h2 promisc on
+
+ # Test that with no PVID, untagged and 802.1p-tagged traffic is
+ # dropped.
+ ip link set br0 type bridge vlan_default_pvid 1
+
+ # First we reconfigure the default_pvid, 1, as a non-PVID VLAN.
+ bridge vlan add dev $swp1 vid 1 untagged
+ send_untagged_and_8021p
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+
+ # Next we try to delete VID 1 altogether
+ bridge vlan del dev $swp1 vid 1
+ send_untagged_and_8021p
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+
+ # Set up the bridge without a default_pvid, then check that the 8021q
+ # module, when the bridge port goes down and then up again, does not
+ # accidentally re-enable untagged packet reception.
+ ip link set br0 type bridge vlan_default_pvid 0
+ ip link set $swp1 down
+ ip link set $swp1 up
+ setup_wait
+ send_untagged_and_8021p
+
+ # Remove swp1 as a bridge port and let it rejoin the bridge while it
+ # has no default_pvid.
+ ip link set $swp1 nomaster
+ ip link set $swp1 master br0
+ send_untagged_and_8021p
+
+ # Restore settings
+ ip link set br0 type bridge vlan_default_pvid 1
+
+ log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID"
+
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh
new file mode 100755
index 000000000000..8992aeabfe0b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh
@@ -0,0 +1,421 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS=" \
+ test_clock_jump_backward \
+ test_taprio_after_ptp \
+ test_max_sdu \
+ test_clock_jump_backward_forward \
+"
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+source tsn_lib.sh
+
+require_command python3
+
+# The test assumes the usual topology from the README, where h1 is connected to
+# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge.
+# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2.
+# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to
+# swp1 (and both to CLOCK_REALTIME).
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+UDS_ADDRESS_H1="/var/run/ptp4l_h1"
+UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1"
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# Tunables
+NUM_PKTS=100
+STREAM_VID=10
+STREAM_PRIO_1=6
+STREAM_PRIO_2=5
+STREAM_PRIO_3=4
+# PTP uses TC 0
+ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2))
+# Use a conservative cycle of 10 ms to allow the test to still pass when the
+# kernel has some extra overhead like lockdep etc
+CYCLE_TIME_NS=10000000
+# Create two Gate Control List entries, one OPEN and one CLOSE, of equal
+# durations
+GATE_DURATION_NS=$((CYCLE_TIME_NS / 2))
+# Give 2/3 of the cycle time to user space and 1/3 to the kernel
+FUDGE_FACTOR=$((CYCLE_TIME_NS / 3))
+# Shift the isochron base time by half the gate time, so that packets are
+# always received by swp1 close to the middle of the time slot, to minimize
+# inaccuracies due to network sync
+SHIFT_TIME_NS=$((GATE_DURATION_NS / 2))
+
+path_delay=
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+switch_create()
+{
+ local h2_mac_addr=$(mac_get $h2)
+
+ ip link set $swp1 up
+ ip link set $swp2 up
+
+ ip link add br0 type bridge vlan_filtering 1
+ ip link set $swp1 master br0
+ ip link set $swp2 master br0
+ ip link set br0 up
+
+ bridge vlan add dev $swp2 vid $STREAM_VID
+ bridge vlan add dev $swp1 vid $STREAM_VID
+ bridge fdb add dev $swp2 \
+ $h2_mac_addr vlan $STREAM_VID static master
+}
+
+switch_destroy()
+{
+ ip link del br0
+}
+
+ptp_setup()
+{
+ # Set up swp1 as a master PHC for h1, synchronized to the local
+ # CLOCK_REALTIME.
+ phc2sys_start $UDS_ADDRESS_SWP1
+ ptp4l_start $h1 true $UDS_ADDRESS_H1
+ ptp4l_start $swp1 false $UDS_ADDRESS_SWP1
+}
+
+ptp_cleanup()
+{
+ ptp4l_stop $swp1
+ ptp4l_stop $h1
+ phc2sys_stop
+}
+
+txtime_setup()
+{
+ local if_name=$1
+
+ tc qdisc add dev $if_name clsact
+ # Classify PTP on TC 7 and isochron on TC 6
+ tc filter add dev $if_name egress protocol 0x88f7 \
+ flower action skbedit priority 7
+ tc filter add dev $if_name egress protocol 802.1Q \
+ flower vlan_ethtype 0xdead action skbedit priority 6
+ tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ hw 1
+ # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1.
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+}
+
+txtime_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name clsact
+ tc qdisc del dev $if_name root
+}
+
+taprio_replace()
+{
+ local if_name="$1"; shift
+ local extra_args="$1"; shift
+
+ # STREAM_PRIO_1 always has an open gate.
+ # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time)
+ # STREAM_PRIO_3 always has a closed gate.
+ tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \
+ sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \
+ base-time 0 flags 0x2 $extra_args
+ taprio_wait_for_admin $if_name
+}
+
+taprio_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name root
+}
+
+probe_path_delay()
+{
+ local isochron_dat="$(mktemp)"
+ local received
+
+ log_info "Probing path delay"
+
+ isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \
+ "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \
+ "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat"
+
+ received=$(isochron_report_num_received "$isochron_dat")
+ if [ "$received" != "$NUM_PKTS" ]; then
+ echo "Cannot establish basic data path between $h1 and $h2"
+ exit $ksft_fail
+ fi
+
+ printf "pdelay = {}\n" > isochron_data.py
+ isochron report --input-file "$isochron_dat" \
+ --printf-format "pdelay[%u] = %d - %d\n" \
+ --printf-args "qRT" \
+ >> isochron_data.py
+ cat <<-'EOF' > isochron_postprocess.py
+ #!/usr/bin/env python3
+
+ from isochron_data import pdelay
+ import numpy as np
+
+ w = np.array(list(pdelay.values()))
+ print("{}".format(np.max(w)))
+ EOF
+ path_delay=$(python3 ./isochron_postprocess.py)
+
+ log_info "Path delay from $h1 to $h2 estimated at $path_delay ns"
+
+ if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then
+ echo "Path delay larger than gate duration, aborting"
+ exit $ksft_fail
+ fi
+
+ rm -f ./isochron_data.py 2> /dev/null
+ rm -f ./isochron_postprocess.py 2> /dev/null
+ rm -f "$isochron_dat" 2> /dev/null
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+
+ txtime_setup $h1
+
+ # Temporarily set up PTP just to probe the end-to-end path delay.
+ ptp_setup
+ probe_path_delay
+ ptp_cleanup
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ isochron_recv_stop
+ txtime_cleanup $h1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+run_test()
+{
+ local base_time=$1; shift
+ local stream_prio=$1; shift
+ local expected_delay=$1; shift
+ local should_fail=$1; shift
+ local test_name=$1; shift
+ local isochron_dat="$(mktemp)"
+ local received
+ local median_delay
+
+ RET=0
+
+ # Set the shift time equal to the cycle time, which effectively
+ # cancels the default advance time. Packets won't be sent early in
+ # software, which ensures that they won't prematurely enter through
+ # the open gate in __test_out_of_band(). Also, the gate is open for
+ # long enough that this won't cause a problem in __test_in_band().
+ isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \
+ "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \
+ "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat"
+
+ received=$(isochron_report_num_received "$isochron_dat")
+ [ "$received" = "$NUM_PKTS" ]
+ check_err_fail $should_fail $? "Reception of $NUM_PKTS packets"
+
+ if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then
+ printf "pdelay = {}\n" > isochron_data.py
+ isochron report --input-file "$isochron_dat" \
+ --printf-format "pdelay[%u] = %d - %d\n" \
+ --printf-args "qRT" \
+ >> isochron_data.py
+ cat <<-'EOF' > isochron_postprocess.py
+ #!/usr/bin/env python3
+
+ from isochron_data import pdelay
+ import numpy as np
+
+ w = np.array(list(pdelay.values()))
+ print("{}".format(int(np.median(w))))
+ EOF
+ median_delay=$(python3 ./isochron_postprocess.py)
+
+ # If the condition below is true, packets were delayed by a closed gate
+ [ "$median_delay" -gt $((path_delay + expected_delay)) ]
+ check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay"
+
+ # If the condition below is true, packets were sent expecting them to
+ # hit a closed gate in the switch, but were not delayed
+ [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ]
+ check_fail $? "Median delay $median_delay is less than expected delay $expected_delay"
+ fi
+
+ log_test "$test_name"
+
+ rm -f ./isochron_data.py 2> /dev/null
+ rm -f ./isochron_postprocess.py 2> /dev/null
+ rm -f "$isochron_dat" 2> /dev/null
+}
+
+__test_always_open()
+{
+ run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open"
+}
+
+__test_always_closed()
+{
+ run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed"
+}
+
+__test_in_band()
+{
+ # Send packets in-band with the OPEN gate entry
+ run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate"
+}
+
+__test_out_of_band()
+{
+ # Send packets in-band with the CLOSE gate entry
+ run_test 0.005000000 $STREAM_PRIO_2 \
+ $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \
+ "Out of band with gate"
+}
+
+run_subtests()
+{
+ __test_always_open
+ __test_always_closed
+ __test_in_band
+ __test_out_of_band
+}
+
+test_taprio_after_ptp()
+{
+ log_info "Setting up taprio after PTP"
+ ptp_setup
+ taprio_replace $swp2
+ run_subtests
+ taprio_cleanup $swp2
+ ptp_cleanup
+}
+
+__test_under_max_sdu()
+{
+ # Limit max-sdu for STREAM_PRIO_1
+ taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0"
+ run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU"
+}
+
+__test_over_max_sdu()
+{
+ # Limit max-sdu for STREAM_PRIO_1
+ taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0"
+ run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU"
+}
+
+test_max_sdu()
+{
+ ptp_setup
+ __test_under_max_sdu
+ __test_over_max_sdu
+ taprio_cleanup $swp2
+ ptp_cleanup
+}
+
+# Perform a clock jump in the past without synchronization running, so that the
+# time base remains where it was set by phc_ctl.
+test_clock_jump_backward()
+{
+ # This is a more complex schedule specifically crafted in a way that
+ # has been problematic on NXP LS1028A. Not much to test with it other
+ # than the fact that it passes traffic.
+ tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \
+ base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \
+ sched-entry S 20 300000 sched-entry S 48 200000 \
+ sched-entry S 20 300000 sched-entry S 83 200000 \
+ sched-entry S 40 300000 sched-entry S 00 200000 flags 2
+
+ log_info "Forcing a backward clock jump"
+ phc_ctl $swp1 set 0
+
+ ping_test $h1 192.0.2.2
+ taprio_cleanup $swp2
+}
+
+# Test that taprio tolerates clock jumps.
+# Since ptp4l and phc2sys are running, it is expected for the time to
+# eventually recover (through yet another clock jump). Isochron waits
+# until that is the case.
+test_clock_jump_backward_forward()
+{
+ log_info "Forcing a backward and a forward clock jump"
+ taprio_replace $swp2
+ phc_ctl $swp1 set 0
+ ptp_setup
+ ping_test $h1 192.0.2.2
+ run_subtests
+ ptp_cleanup
+ taprio_cleanup $swp2
+}
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_test_skip "Could not test offloaded functionality"
+ exit $EXIT_STATUS
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
index b91bcd8008a9..08c044ff6689 100644
--- a/tools/testing/selftests/net/forwarding/tsn_lib.sh
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -2,6 +2,8 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright 2021-2022 NXP
+tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts
+
REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes}
REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes}
@@ -18,6 +20,7 @@ fi
if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then
require_command phc2sys
require_command ptp4l
+ require_command phc_ctl
fi
phc2sys_start()
@@ -182,6 +185,7 @@ isochron_do()
local base_time=$1; shift
local cycle_time=$1; shift
local shift_time=$1; shift
+ local window_size=$1; shift
local num_pkts=$1; shift
local vid=$1; shift
local priority=$1; shift
@@ -212,6 +216,10 @@ isochron_do()
extra_args="${extra_args} --shift-time=${shift_time}"
fi
+ if ! [ -z "${window_size}" ]; then
+ extra_args="${extra_args} --window-size=${window_size}"
+ fi
+
if [ "${use_l2}" = "true" ]; then
extra_args="${extra_args} --l2 --etype=0xdead ${vid}"
receiver_extra_args="--l2 --etype=0xdead"
@@ -247,3 +255,21 @@ isochron_do()
cpufreq_restore ${ISOCHRON_CPU}
}
+
+isochron_report_num_received()
+{
+ local isochron_dat=$1; shift
+
+ # Count all received packets by looking at the non-zero RX timestamps
+ isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "%u\n" --printf-args "R" | \
+ grep -w -v '0' | wc -l
+}
+
+taprio_wait_for_admin()
+{
+ local if_name="$1"; shift
+
+ "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name"
+}
diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
new file mode 100755
index 000000000000..5b34f6e1f831
--- /dev/null
+++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
@@ -0,0 +1,177 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./lib.sh
+
+PAUSE_ON_FAIL="no"
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+ cleanup_all_ns
+
+ exit "${EXIT_STATUS}"
+}
+
+# Add fake IPv4 and IPv6 networks on the loopback device, to be used as
+# underlay by future GRE devices.
+#
+setup_basenet()
+{
+ ip -netns "${NS0}" link set dev lo up
+ ip -netns "${NS0}" address add dev lo 192.0.2.10/24
+ ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad
+}
+
+# Check if network device has an IPv6 link-local address assigned.
+#
+# Parameters:
+#
+# * $1: The network device to test
+# * $2: An extra regular expression that should be matched (to verify the
+# presence of extra attributes)
+# * $3: The expected return code from grep (to allow checking the absence of
+# a link-local address)
+# * $4: The user visible name for the scenario being tested
+#
+check_ipv6_ll_addr()
+{
+ local DEV="$1"
+ local EXTRA_MATCH="$2"
+ local XRET="$3"
+ local MSG="$4"
+
+ RET=0
+ set +e
+ ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}"
+ check_err_fail "${XRET}" $? ""
+ log_test "${MSG}"
+ set -e
+}
+
+# Create a GRE device and verify that it gets an IPv6 link-local address as
+# expected.
+#
+# Parameters:
+#
+# * $1: The device type (gre, ip6gre, gretap or ip6gretap)
+# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any")
+# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any")
+# * $4: The IPv6 interface identifier generation mode to use for the GRE
+# device (eui64, none, stable-privacy or random).
+#
+test_gre_device()
+{
+ local GRE_TYPE="$1"
+ local LOCAL_IP="$2"
+ local REMOTE_IP="$3"
+ local MODE="$4"
+ local ADDR_GEN_MODE
+ local MATCH_REGEXP
+ local MSG
+
+ ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}"
+
+ case "${MODE}" in
+ "eui64")
+ ADDR_GEN_MODE=0
+ MATCH_REGEXP=""
+ MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ ;;
+ "none")
+ ADDR_GEN_MODE=1
+ MATCH_REGEXP=""
+ MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=1 # No link-local address should be generated
+ ;;
+ "stable-privacy")
+ ADDR_GEN_MODE=2
+ MATCH_REGEXP="stable-privacy"
+ MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ # Initialise stable_secret (required for stable-privacy mode)
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd"
+ ;;
+ "random")
+ ADDR_GEN_MODE=3
+ MATCH_REGEXP="stable-privacy"
+ MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ ;;
+ esac
+
+ # Check that IPv6 link-local address is generated when device goes up
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
+ ip -netns "${NS0}" link set dev gretest up
+ check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}"
+
+ # Now disable link-local address generation
+ ip -netns "${NS0}" link set dev gretest down
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1
+ ip -netns "${NS0}" link set dev gretest up
+
+ # Check that link-local address generation works when re-enabled while
+ # the device is already up
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
+ check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}"
+
+ ip -netns "${NS0}" link del dev gretest
+}
+
+test_gre4()
+{
+ local GRE_TYPE
+ local MODE
+
+ for GRE_TYPE in "gre" "gretap"; do
+ printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n"
+
+ for MODE in "eui64" "none" "stable-privacy" "random"; do
+ test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}"
+ done
+ done
+}
+
+test_gre6()
+{
+ local GRE_TYPE
+ local MODE
+
+ for GRE_TYPE in "ip6gre" "ip6gretap"; do
+ printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n"
+
+ for MODE in "eui64" "none" "stable-privacy" "random"; do
+ test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}"
+ done
+ done
+}
+
+usage()
+{
+ echo "Usage: $0 [-p]"
+ exit 1
+}
+
+while getopts :p o
+do
+ case $o in
+ p) PAUSE_ON_FAIL="yes";;
+ *) usage;;
+ esac
+done
+
+setup_ns NS0
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_basenet
+
+test_gre4
+test_gre6
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 4f55477ffe08..e7a75341f0f3 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -206,9 +206,8 @@ chk_dump_one()
local token
local msg
- ss_token="$(ss -inmHMN $ns | grep 'token:' |\
- head -n 1 |\
- sed 's/.*token:\([0-9a-f]*\).*/\1/')"
+ ss_token="$(ss -inmHMN $ns |
+ mptcp_lib_get_info_value "token" "token")"
token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\
awk -F':[ \t]+' '/^token/ {print $2}')"
diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile
index 48ec048f47af..277f92f9d753 100644
--- a/tools/testing/selftests/pcie_bwctrl/Makefile
+++ b/tools/testing/selftests/pcie_bwctrl/Makefile
@@ -1,2 +1,3 @@
-TEST_PROGS = set_pcie_cooling_state.sh set_pcie_speed.sh
+TEST_PROGS = set_pcie_cooling_state.sh
+TEST_FILES = set_pcie_speed.sh
include ../lib.mk
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json
index 1ba96c467754..d9fc62ab476c 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json
@@ -412,5 +412,27 @@
"teardown": [
"$TC qdisc del dev $DUMMY ingress"
]
+ },
+ {
+ "id": "33f4",
+ "name": "Check echo of big filter command",
+ "category": [
+ "infra",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY parent root handle 10: fq_codel"
+ ],
+ "cmdUnderTest": "bash -c '$TC -echo filter add dev $DUMMY parent 10: u32 match u32 0 0 $(for i in $(seq 32); do echo action pedit munge ip dport set 22; done) | grep \"added filter\"'",
+ "verifyCmd": "",
+ "expExitCode": "0",
+ "matchCount": "0",
+ "matchPattern": "",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY parent root fq_codel"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index d4ea9cd845a3..a951c0d33cd2 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -313,5 +313,265 @@
"$TC qdisc del dev $DUMMY handle 1: root",
"$IP addr del 10.10.10.10/24 dev $DUMMY || true"
]
+ },
+ {
+ "id": "a4c3",
+ "name": "Test HFSC with netem/blackhole - queue emptying during peek operation",
+ "category": [
+ "qdisc",
+ "hfsc",
+ "netem",
+ "blackhole"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root drr",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:2 drr",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 plug limit 1024",
+ "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 hfsc default 1",
+ "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit",
+ "$TC qdisc add dev $DUMMY parent 3:1 handle 4:0 netem delay 1ms",
+ "$TC qdisc add dev $DUMMY parent 4:1 handle 5:0 blackhole",
+ "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true",
+ "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit",
+ "$TC class del dev $DUMMY parent 3:0 classid 3:1",
+ "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit",
+ "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hfsc 3:.*parent 1:2.*default 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "90ec",
+ "name": "Test DRR's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root drr",
+ "$TC class replace dev $DUMMY parent 1:0 classid 1:1 drr",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1"
+ ],
+ "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0",
+ "matchJSON": [
+ {
+ "kind": "drr",
+ "handle": "1:",
+ "bytes": 196,
+ "packets": 2
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "1f1f",
+ "name": "Test ETS's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root ets bands 2",
+ "$TC class replace dev $DUMMY parent 1:0 classid 1:1 ets quantum 1500",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1"
+ ],
+ "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s class show dev $DUMMY",
+ "matchJSON": [
+ {
+ "class": "ets",
+ "handle": "1:1",
+ "stats": {
+ "bytes": 196,
+ "packets": 2
+ }
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "5e6d",
+ "name": "Test QFQ's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root qfq",
+ "$TC class replace dev $DUMMY parent 1:0 classid 1:1 qfq weight 100 maxpkt 1500",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1"
+ ],
+ "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0",
+ "matchJSON": [
+ {
+ "kind": "qfq",
+ "handle": "1:",
+ "bytes": 196,
+ "packets": 2
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "bf1d",
+ "name": "Test HFSC's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root hfsc",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc ls m2 10Mbit",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit",
+ "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2",
+ "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true",
+ "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1",
+ "$TC class del dev $DUMMY classid 1:1"
+ ],
+ "cmdUnderTest": "ping -c 1 10.10.10.2 -I$DUMMY > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0",
+ "matchJSON": [
+ {
+ "kind": "hfsc",
+ "handle": "1:",
+ "bytes": 392,
+ "packets": 4
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "7c3b",
+ "name": "Test nested DRR's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root drr",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1",
+ "$TC qdisc add dev $DUMMY handle 2:0 parent 1:1 drr",
+ "$TC class add dev $DUMMY classid 2:1 parent 2:0 drr",
+ "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 2:1",
+ "$TC qdisc add dev $DUMMY parent 2:1 handle 3:0 netem duplicate 100%"
+ ],
+ "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0",
+ "matchJSON": [
+ {
+ "kind": "drr",
+ "handle": "1:",
+ "bytes": 196,
+ "packets": 2
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "62c4",
+ "name": "Test HTB with FQ_CODEL - basic functionality",
+ "category": [
+ "qdisc",
+ "htb",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 root handle 1: htb default 11",
+ "$TC class add dev $DEV1 parent 1: classid 1:1 htb rate 10kbit",
+ "$TC class add dev $DEV1 parent 1:1 classid 1:11 htb rate 10kbit prio 0 quantum 1486",
+ "$TC qdisc add dev $DEV1 parent 1:11 fq_codel quantum 300 noecn",
+ "sleep 0.5"
+ ],
+ "scapy": {
+ "iface": "$DEV0",
+ "count": 5,
+ "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/TCP(sport=12345, dport=80)"
+ },
+ "cmdUnderTest": "$TC -s qdisc show dev $DEV1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DEV1 | grep -A 5 'qdisc fq_codel'",
+ "matchPattern": "Sent [0-9]+ bytes [0-9]+ pkt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 handle 1: root"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
index e9469ee71e6f..6d515d0e5ed6 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
@@ -189,5 +189,29 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "deb1",
+ "name": "CODEL test qdisc limit trimming",
+ "category": ["qdisc", "codel"],
+ "plugins": {
+ "requires": ["nsPlugin", "scapyPlugin"]
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root codel limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root codel limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1p target 5ms interval 100ms",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
index 3a537b2ec4c9..24faf4e12dfa 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
@@ -377,5 +377,27 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "9479",
+ "name": "FQ test qdisc limit trimming",
+ "category": ["qdisc", "fq"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root fq limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 1p",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
index 9774b1e8801b..4ce62b857fd7 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
@@ -294,5 +294,27 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "0436",
+ "name": "FQ_CODEL test qdisc limit trimming",
+ "category": ["qdisc", "fq_codel"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root fq_codel limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_codel limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
index d012d88d67fe..229fe1bf4a90 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
@@ -18,5 +18,27 @@
"matchCount": "1",
"teardown": [
]
+ },
+ {
+ "id": "83bf",
+ "name": "FQ_PIE test qdisc limit trimming",
+ "category": ["qdisc", "fq_pie"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root fq_pie limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_pie limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc fq_pie 1: root refcnt [0-9]+ limit 1p",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
index dbef5474b26b..0ca19fac54a5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
@@ -188,5 +188,27 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "385f",
+ "name": "HHF test qdisc limit trimming",
+ "category": ["qdisc", "hhf"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root hhf limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root hhf limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc hhf 1: root refcnt [0-9]+ limit 1p.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json
new file mode 100644
index 000000000000..1a98b66e8030
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json
@@ -0,0 +1,24 @@
+[
+ {
+ "id": "6158",
+ "name": "PIE test qdisc limit trimming",
+ "category": ["qdisc", "pie"],
+ "plugins": {"requires": ["nsPlugin", "scapyPlugin"]},
+ "setup": [
+ "$TC qdisc add dev $DEV1 handle 1: root pie limit 10"
+ ],
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 10,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root pie limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DEV1",
+ "matchPattern": "qdisc pie 1: root refcnt [0-9]+ limit 1p",
+ "matchCount": "1",
+ "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"]
+ }
+]
diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index c7781efea0f3..f34ac0bac696 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -6,6 +6,10 @@ LDLIBS += -lpthread -lm -luring
TEST_PROGS := test_generic_01.sh
TEST_PROGS += test_generic_02.sh
TEST_PROGS += test_generic_03.sh
+TEST_PROGS += test_generic_04.sh
+TEST_PROGS += test_generic_05.sh
+TEST_PROGS += test_generic_06.sh
+TEST_PROGS += test_generic_07.sh
TEST_PROGS += test_null_01.sh
TEST_PROGS += test_null_02.sh
@@ -21,12 +25,16 @@ TEST_PROGS += test_stripe_04.sh
TEST_PROGS += test_stress_01.sh
TEST_PROGS += test_stress_02.sh
+TEST_PROGS += test_stress_03.sh
+TEST_PROGS += test_stress_04.sh
+TEST_PROGS += test_stress_05.sh
TEST_GEN_PROGS_EXTENDED = kublk
include ../lib.mk
-$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c
+$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c \
+ fault_inject.c
check:
shellcheck -x -f gcc *.sh
diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c
new file mode 100644
index 000000000000..94a8e729ba4c
--- /dev/null
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Fault injection ublk target. Hack this up however you like for
+ * testing specific behaviors of ublk_drv. Currently is a null target
+ * with a configurable delay before completing each I/O. This delay can
+ * be used to test ublk_drv's handling of I/O outstanding to the ublk
+ * server when it dies.
+ */
+
+#include "kublk.h"
+
+static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
+ struct ublk_dev *dev)
+{
+ const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
+ unsigned long dev_size = 250UL << 30;
+
+ dev->tgt.dev_size = dev_size;
+ dev->tgt.params = (struct ublk_params) {
+ .types = UBLK_PARAM_TYPE_BASIC,
+ .basic = {
+ .logical_bs_shift = 9,
+ .physical_bs_shift = 12,
+ .io_opt_shift = 12,
+ .io_min_shift = 9,
+ .max_sectors = info->max_io_buf_bytes >> 9,
+ .dev_sectors = dev_size >> 9,
+ },
+ };
+
+ dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000);
+ return 0;
+}
+
+static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+ struct io_uring_sqe *sqe;
+ struct __kernel_timespec ts = {
+ .tv_nsec = (long long)q->dev->private_data,
+ };
+
+ ublk_queue_alloc_sqes(q, &sqe, 1);
+ io_uring_prep_timeout(sqe, &ts, 1, 0);
+ sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, 1);
+
+ ublk_queued_tgt_io(q, tag, 1);
+
+ return 0;
+}
+
+static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag,
+ const struct io_uring_cqe *cqe)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+
+ if (cqe->res != -ETIME)
+ ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res);
+
+ if (ublk_completed_tgt_io(q, tag))
+ ublk_complete_io(q, tag, iod->nr_sectors << 9);
+ else
+ ublk_err("%s: io not complete after 1 cqe\n", __func__);
+}
+
+static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "delay_us", 1, NULL, 0 },
+ { 0, 0, 0, 0 }
+ };
+ int option_idx, opt;
+
+ ctx->fault_inject.delay_us = 0;
+ while ((opt = getopt_long(argc, argv, "",
+ longopts, &option_idx)) != -1) {
+ switch (opt) {
+ case 0:
+ if (!strcmp(longopts[option_idx].name, "delay_us"))
+ ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10);
+ }
+ }
+}
+
+static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops)
+{
+ printf("\tfault_inject: [--delay_us us (default 0)]\n");
+}
+
+const struct ublk_tgt_ops fault_inject_tgt_ops = {
+ .name = "fault_inject",
+ .init_tgt = ublk_fault_inject_tgt_init,
+ .queue_io = ublk_fault_inject_queue_io,
+ .tgt_io_done = ublk_fault_inject_tgt_io_done,
+ .parse_cmd_line = ublk_fault_inject_cmd_line,
+ .usage = ublk_fault_inject_usage,
+};
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 91c282bc7674..842b40736a9b 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -5,22 +5,24 @@
#include "kublk.h"
+#define MAX_NR_TGT_ARG 64
+
unsigned int ublk_dbg_mask = UBLK_LOG;
static const struct ublk_tgt_ops *tgt_ops_list[] = {
&null_tgt_ops,
&loop_tgt_ops,
&stripe_tgt_ops,
+ &fault_inject_tgt_ops,
};
static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
{
- const struct ublk_tgt_ops *ops;
int i;
if (name == NULL)
return NULL;
- for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++)
+ for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++)
if (strcmp(tgt_ops_list[i]->name, name) == 0)
return tgt_ops_list[i];
return NULL;
@@ -118,6 +120,27 @@ static int ublk_ctrl_start_dev(struct ublk_dev *dev,
return __ublk_ctrl_cmd(dev, &data);
}
+static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_START_USER_RECOVERY,
+ };
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_END_USER_RECOVERY,
+ .flags = CTRL_CMD_HAS_DATA,
+ };
+
+ dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
static int ublk_ctrl_add_dev(struct ublk_dev *dev)
{
struct ublk_ctrl_cmd_data data = {
@@ -207,10 +230,73 @@ static const char *ublk_dev_state_desc(struct ublk_dev *dev)
};
}
+static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len)
+{
+ unsigned done = 0;
+ int i;
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (CPU_ISSET(i, set))
+ done += snprintf(&buf[done], len - done, "%d ", i);
+ }
+}
+
+static void ublk_adjust_affinity(cpu_set_t *set)
+{
+ int j, updated = 0;
+
+ /*
+ * Just keep the 1st CPU now.
+ *
+ * In future, auto affinity selection can be tried.
+ */
+ for (j = 0; j < CPU_SETSIZE; j++) {
+ if (CPU_ISSET(j, set)) {
+ if (!updated) {
+ updated = 1;
+ continue;
+ }
+ CPU_CLR(j, set);
+ }
+ }
+}
+
+/* Caller must free the allocated buffer */
+static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY,
+ .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF,
+ };
+ cpu_set_t *buf;
+ int i, ret;
+
+ buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues);
+ if (!buf)
+ return -ENOMEM;
+
+ for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) {
+ data.data[0] = i;
+ data.len = sizeof(cpu_set_t);
+ data.addr = (__u64)&buf[i];
+
+ ret = __ublk_ctrl_cmd(ctrl_dev, &data);
+ if (ret < 0) {
+ free(buf);
+ return ret;
+ }
+ ublk_adjust_affinity(&buf[i]);
+ }
+
+ *ptr_buf = buf;
+ return 0;
+}
+
static void ublk_ctrl_dump(struct ublk_dev *dev)
{
struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
struct ublk_params p;
+ cpu_set_t *affinity;
int ret;
ret = ublk_ctrl_get_params(dev, &p);
@@ -219,12 +305,31 @@ static void ublk_ctrl_dump(struct ublk_dev *dev)
return;
}
+ ret = ublk_ctrl_get_affinity(dev, &affinity);
+ if (ret < 0) {
+ ublk_err("failed to get affinity %m\n");
+ return;
+ }
+
ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
info->dev_id, info->nr_hw_queues, info->queue_depth,
1 << p.basic.logical_bs_shift, p.basic.dev_sectors);
ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n",
info->max_io_buf_bytes, info->ublksrv_pid, info->flags,
ublk_dev_state_desc(dev));
+
+ if (affinity) {
+ char buf[512];
+ int i;
+
+ for (i = 0; i < info->nr_hw_queues; i++) {
+ ublk_print_cpu_set(&affinity[i], buf, sizeof(buf));
+ printf("\tqueue %u: tid %d affinity(%s)\n",
+ i, dev->q[i].tid, buf);
+ }
+ free(affinity);
+ }
+
fflush(stdout);
}
@@ -347,7 +452,9 @@ static int ublk_queue_init(struct ublk_queue *q)
}
ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
- IORING_SETUP_COOP_TASKRUN);
+ IORING_SETUP_COOP_TASKRUN |
+ IORING_SETUP_SINGLE_ISSUER |
+ IORING_SETUP_DEFER_TASKRUN);
if (ret < 0) {
ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
q->dev->dev_info.dev_id, q->q_id, ret);
@@ -429,12 +536,17 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
if (!(io->flags & UBLKSRV_IO_FREE))
return 0;
- /* we issue because we need either fetching or committing */
+ /*
+ * we issue because we need either fetching or committing or
+ * getting data
+ */
if (!(io->flags &
- (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
+ (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA)))
return 0;
- if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
+ if (io->flags & UBLKSRV_NEED_GET_DATA)
+ cmd_op = UBLK_U_IO_NEED_GET_DATA;
+ else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
cmd_op = UBLK_U_IO_FETCH_REQ;
@@ -551,6 +663,9 @@ static void ublk_handle_cqe(struct io_uring *r,
assert(tag < q->q_depth);
if (q->tgt_ops->queue_io)
q->tgt_ops->queue_io(q, tag);
+ } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
+ io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE;
+ ublk_queue_io_cmd(q, io, tag);
} else {
/*
* COMMIT_REQ will be completed immediately since no fetching
@@ -602,9 +717,24 @@ static int ublk_process_io(struct ublk_queue *q)
return reapped;
}
+static void ublk_queue_set_sched_affinity(const struct ublk_queue *q,
+ cpu_set_t *cpuset)
+{
+ if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0)
+ ublk_err("ublk dev %u queue %u set affinity failed",
+ q->dev->dev_info.dev_id, q->q_id);
+}
+
+struct ublk_queue_info {
+ struct ublk_queue *q;
+ sem_t *queue_sem;
+ cpu_set_t *affinity;
+};
+
static void *ublk_io_handler_fn(void *data)
{
- struct ublk_queue *q = data;
+ struct ublk_queue_info *info = data;
+ struct ublk_queue *q = info->q;
int dev_id = q->dev->dev_info.dev_id;
int ret;
@@ -614,6 +744,10 @@ static void *ublk_io_handler_fn(void *data)
dev_id, q->q_id);
return NULL;
}
+ /* IO perf is sensitive with queue pthread affinity on NUMA machine*/
+ ublk_queue_set_sched_affinity(q, info->affinity);
+ sem_post(info->queue_sem);
+
ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
q->tid, dev_id, q->q_id);
@@ -639,7 +773,7 @@ static void ublk_set_parameters(struct ublk_dev *dev)
dev->dev_info.dev_id, ret);
}
-static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
+static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id)
{
uint64_t id;
int evtfd = ctx->_evtfd;
@@ -652,36 +786,68 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
else
id = ERROR_EVTFD_DEVID;
+ if (dev && ctx->shadow_dev)
+ memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q));
+
if (write(evtfd, &id, sizeof(id)) != sizeof(id))
return -EINVAL;
+ close(evtfd);
+ shmdt(ctx->shadow_dev);
+
return 0;
}
static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
{
- int ret, i;
- void *thread_ret;
const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
+ struct ublk_queue_info *qinfo;
+ cpu_set_t *affinity_buf;
+ void *thread_ret;
+ sem_t queue_sem;
+ int ret, i;
ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
+ qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info),
+ dinfo->nr_hw_queues);
+ if (!qinfo)
+ return -ENOMEM;
+
+ sem_init(&queue_sem, 0, 0);
ret = ublk_dev_prep(ctx, dev);
if (ret)
return ret;
+ ret = ublk_ctrl_get_affinity(dev, &affinity_buf);
+ if (ret)
+ return ret;
+
for (i = 0; i < dinfo->nr_hw_queues; i++) {
dev->q[i].dev = dev;
dev->q[i].q_id = i;
+
+ qinfo[i].q = &dev->q[i];
+ qinfo[i].queue_sem = &queue_sem;
+ qinfo[i].affinity = &affinity_buf[i];
pthread_create(&dev->q[i].thread, NULL,
ublk_io_handler_fn,
- &dev->q[i]);
+ &qinfo[i]);
}
+ for (i = 0; i < dinfo->nr_hw_queues; i++)
+ sem_wait(&queue_sem);
+ free(qinfo);
+ free(affinity_buf);
+
/* everything is fine now, start us */
- ublk_set_parameters(dev);
- ret = ublk_ctrl_start_dev(dev, getpid());
+ if (ctx->recovery)
+ ret = ublk_ctrl_end_user_recovery(dev, getpid());
+ else {
+ ublk_set_parameters(dev);
+ ret = ublk_ctrl_start_dev(dev, getpid());
+ }
if (ret < 0) {
ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
goto fail;
@@ -691,7 +857,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
if (ctx->fg)
ublk_ctrl_dump(dev);
else
- ublk_send_dev_event(ctx, dev->dev_info.dev_id);
+ ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id);
/* wait until we are terminated */
for (i = 0; i < dinfo->nr_hw_queues; i++)
@@ -856,7 +1022,10 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
}
}
- ret = ublk_ctrl_add_dev(dev);
+ if (ctx->recovery)
+ ret = ublk_ctrl_start_user_recovery(dev);
+ else
+ ret = ublk_ctrl_add_dev(dev);
if (ret < 0) {
ublk_err("%s: can't add dev id %d, type %s ret %d\n",
__func__, dev_id, tgt_type, ret);
@@ -870,7 +1039,7 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
fail:
if (ret < 0)
- ublk_send_dev_event(ctx, -1);
+ ublk_send_dev_event(ctx, dev, -1);
ublk_ctrl_deinit(dev);
return ret;
}
@@ -884,30 +1053,58 @@ static int cmd_dev_add(struct dev_ctx *ctx)
if (ctx->fg)
goto run;
+ ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666);
+ if (ctx->_shmid < 0) {
+ ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno));
+ exit(-1);
+ }
+ ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0);
+ if (ctx->shadow_dev == (struct ublk_dev *)-1) {
+ ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno));
+ exit(-1);
+ }
ctx->_evtfd = eventfd(0, 0);
if (ctx->_evtfd < 0) {
ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno));
exit(-1);
}
- setsid();
res = fork();
if (res == 0) {
+ int res2;
+
+ setsid();
+ res2 = fork();
+ if (res2 == 0) {
+ /* prepare for detaching */
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
run:
- res = __cmd_dev_add(ctx);
- return res;
+ res = __cmd_dev_add(ctx);
+ return res;
+ } else {
+ /* detached from the foreground task */
+ exit(EXIT_SUCCESS);
+ }
} else if (res > 0) {
uint64_t id;
+ int exit_code = EXIT_FAILURE;
res = read(ctx->_evtfd, &id, sizeof(id));
close(ctx->_evtfd);
if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) {
ctx->dev_id = id - 1;
- return __cmd_dev_list(ctx);
+ if (__cmd_dev_list(ctx) >= 0)
+ exit_code = EXIT_SUCCESS;
}
- exit(EXIT_FAILURE);
+ shmdt(ctx->shadow_dev);
+ shmctl(ctx->_shmid, IPC_RMID, NULL);
+ /* wait for child and detach from it */
+ wait(NULL);
+ exit(exit_code);
} else {
- return res;
+ exit(EXIT_FAILURE);
}
}
@@ -969,6 +1166,9 @@ static int __cmd_dev_list(struct dev_ctx *ctx)
ublk_err("%s: can't get dev info from %d: %d\n",
__func__, ctx->dev_id, ret);
} else {
+ if (ctx->shadow_dev)
+ memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q));
+
ublk_ctrl_dump(dev);
}
@@ -1039,14 +1239,47 @@ static int cmd_dev_get_features(void)
return ret;
}
+static void __cmd_create_help(char *exe, bool recovery)
+{
+ int i;
+
+ printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
+ exe, recovery ? "recover" : "add");
+ printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n");
+ printf("\t[-e 0|1 ] [-i 0|1]\n");
+ printf("\t[target options] [backfile1] [backfile2] ...\n");
+ printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
+
+ for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) {
+ const struct ublk_tgt_ops *ops = tgt_ops_list[i];
+
+ if (ops->usage)
+ ops->usage(ops);
+ }
+}
+
+static void cmd_add_help(char *exe)
+{
+ __cmd_create_help(exe, false);
+ printf("\n");
+}
+
+static void cmd_recover_help(char *exe)
+{
+ __cmd_create_help(exe, true);
+ printf("\tPlease provide exact command line for creating this device with real dev_id\n");
+ printf("\n");
+}
+
static int cmd_dev_help(char *exe)
{
- printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe);
- printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n");
+ cmd_add_help(exe);
+ cmd_recover_help(exe);
+
printf("%s del [-n dev_id] -a \n", exe);
- printf("\t -a delete all devices -n delete specified device\n");
+ printf("\t -a delete all devices -n delete specified device\n\n");
printf("%s list [-n dev_id] -a \n", exe);
- printf("\t -a list all devices, -n list specified device, default -a \n");
+ printf("\t -a list all devices, -n list specified device, default -a \n\n");
printf("%s features\n", exe);
return 0;
}
@@ -1063,9 +1296,13 @@ int main(int argc, char *argv[])
{ "quiet", 0, NULL, 0 },
{ "zero_copy", 0, NULL, 'z' },
{ "foreground", 0, NULL, 0 },
- { "chunk_size", 1, NULL, 0 },
+ { "recovery", 1, NULL, 'r' },
+ { "recovery_fail_io", 1, NULL, 'e'},
+ { "recovery_reissue", 1, NULL, 'i'},
+ { "get_data", 1, NULL, 'g'},
{ 0, 0, 0, 0 }
};
+ const struct ublk_tgt_ops *ops = NULL;
int option_idx, opt;
const char *cmd = argv[1];
struct dev_ctx ctx = {
@@ -1073,15 +1310,18 @@ int main(int argc, char *argv[])
.nr_hw_queues = 2,
.dev_id = -1,
.tgt_type = "unknown",
- .chunk_size = 65536, /* def chunk size is 64K */
};
int ret = -EINVAL, i;
+ int tgt_argc = 1;
+ char *tgt_argv[MAX_NR_TGT_ARG] = { NULL };
+ int value;
if (argc == 1)
return ret;
+ opterr = 0;
optind = 2;
- while ((opt = getopt_long(argc, argv, "t:n:d:q:az",
+ while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz",
longopts, &option_idx)) != -1) {
switch (opt) {
case 'a':
@@ -1103,6 +1343,24 @@ int main(int argc, char *argv[])
case 'z':
ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
break;
+ case 'r':
+ value = strtol(optarg, NULL, 10);
+ if (value)
+ ctx.flags |= UBLK_F_USER_RECOVERY;
+ break;
+ case 'e':
+ value = strtol(optarg, NULL, 10);
+ if (value)
+ ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO;
+ break;
+ case 'i':
+ value = strtol(optarg, NULL, 10);
+ if (value)
+ ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE;
+ break;
+ case 'g':
+ ctx.flags |= UBLK_F_NEED_GET_DATA;
+ break;
case 0:
if (!strcmp(longopts[option_idx].name, "debug_mask"))
ublk_dbg_mask = strtol(optarg, NULL, 16);
@@ -1110,8 +1368,26 @@ int main(int argc, char *argv[])
ublk_dbg_mask = 0;
if (!strcmp(longopts[option_idx].name, "foreground"))
ctx.fg = 1;
- if (!strcmp(longopts[option_idx].name, "chunk_size"))
- ctx.chunk_size = strtol(optarg, NULL, 10);
+ break;
+ case '?':
+ /*
+ * target requires every option must have argument
+ */
+ if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') {
+ fprintf(stderr, "every target option requires argument: %s %s\n",
+ argv[optind - 1], argv[optind]);
+ exit(EXIT_FAILURE);
+ }
+
+ if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) {
+ tgt_argv[tgt_argc++] = argv[optind - 1];
+ tgt_argv[tgt_argc++] = argv[optind];
+ } else {
+ fprintf(stderr, "too many target options\n");
+ exit(EXIT_FAILURE);
+ }
+ optind += 1;
+ break;
}
}
@@ -1120,9 +1396,25 @@ int main(int argc, char *argv[])
ctx.files[ctx.nr_files++] = argv[i++];
}
+ ops = ublk_find_tgt(ctx.tgt_type);
+ if (ops && ops->parse_cmd_line) {
+ optind = 0;
+
+ tgt_argv[0] = ctx.tgt_type;
+ ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv);
+ }
+
if (!strcmp(cmd, "add"))
ret = cmd_dev_add(&ctx);
- else if (!strcmp(cmd, "del"))
+ else if (!strcmp(cmd, "recover")) {
+ if (ctx.dev_id < 0) {
+ fprintf(stderr, "device id isn't provided for recovering\n");
+ ret = -EINVAL;
+ } else {
+ ctx.recovery = 1;
+ ret = cmd_dev_add(&ctx);
+ }
+ } else if (!strcmp(cmd, "del"))
ret = cmd_dev_del(&ctx);
else if (!strcmp(cmd, "list")) {
ctx.all = 1;
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 760ff8ffb810..44ee1e4ac55b 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -20,9 +20,15 @@
#include <sys/wait.h>
#include <sys/eventfd.h>
#include <sys/uio.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <linux/io_uring.h>
#include <liburing.h>
-#include <linux/ublk_cmd.h>
+#include <semaphore.h>
+
+/* allow ublk_dep.h to override ublk_cmd.h */
#include "ublk_dep.h"
+#include <linux/ublk_cmd.h>
#define __maybe_unused __attribute__((unused))
#define MAX_BACK_FILES 4
@@ -30,6 +36,8 @@
#define min(a, b) ((a) < (b) ? (a) : (b))
#endif
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
/****************** part 1: libublk ********************/
#define CTRL_DEV "/dev/ublk-control"
@@ -42,8 +50,8 @@
#define UBLKSRV_IO_IDLE_SECS 20
#define UBLK_IO_MAX_BYTES (1 << 20)
-#define UBLK_MAX_QUEUES 4
-#define UBLK_QUEUE_DEPTH 128
+#define UBLK_MAX_QUEUES 32
+#define UBLK_QUEUE_DEPTH 1024
#define UBLK_DBG_DEV (1U << 0)
#define UBLK_DBG_QUEUE (1U << 1)
@@ -55,6 +63,16 @@
struct ublk_dev;
struct ublk_queue;
+struct stripe_ctx {
+ /* stripe */
+ unsigned int chunk_size;
+};
+
+struct fault_inject_ctx {
+ /* fault_inject */
+ unsigned long delay_us;
+};
+
struct dev_ctx {
char tgt_type[16];
unsigned long flags;
@@ -66,11 +84,18 @@ struct dev_ctx {
unsigned int logging:1;
unsigned int all:1;
unsigned int fg:1;
-
- /* stripe */
- unsigned int chunk_size;
+ unsigned int recovery:1;
int _evtfd;
+ int _shmid;
+
+ /* built from shmem, only for ublk_dump_dev() */
+ struct ublk_dev *shadow_dev;
+
+ union {
+ struct stripe_ctx stripe;
+ struct fault_inject_ctx fault_inject;
+ };
};
struct ublk_ctrl_cmd_data {
@@ -90,6 +115,7 @@ struct ublk_io {
#define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
#define UBLKSRV_IO_FREE (1UL << 2)
+#define UBLKSRV_NEED_GET_DATA (1UL << 3)
unsigned short flags;
unsigned short refs; /* used by target code only */
@@ -107,6 +133,14 @@ struct ublk_tgt_ops {
int (*queue_io)(struct ublk_queue *, int tag);
void (*tgt_io_done)(struct ublk_queue *,
int tag, const struct io_uring_cqe *);
+
+ /*
+ * Target specific command line handling
+ *
+ * each option requires argument for target command line
+ */
+ void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
+ void (*usage)(const struct ublk_tgt_ops *ops);
};
struct ublk_tgt {
@@ -357,6 +391,7 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q)
extern const struct ublk_tgt_ops null_tgt_ops;
extern const struct ublk_tgt_ops loop_tgt_ops;
extern const struct ublk_tgt_ops stripe_tgt_ops;
+extern const struct ublk_tgt_ops fault_inject_tgt_ops;
void backing_file_tgt_deinit(struct ublk_dev *dev);
int backing_file_tgt_init(struct ublk_dev *dev);
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 179731c3dd6f..5dbd6392d83d 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -281,7 +281,7 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
},
};
- unsigned chunk_size = ctx->chunk_size;
+ unsigned chunk_size = ctx->stripe.chunk_size;
struct stripe_conf *conf;
unsigned chunk_shift;
loff_t bytes = 0;
@@ -344,10 +344,36 @@ static void ublk_stripe_tgt_deinit(struct ublk_dev *dev)
backing_file_tgt_deinit(dev);
}
+static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "chunk_size", 1, NULL, 0 },
+ { 0, 0, 0, 0 }
+ };
+ int option_idx, opt;
+
+ ctx->stripe.chunk_size = 65536;
+ while ((opt = getopt_long(argc, argv, "",
+ longopts, &option_idx)) != -1) {
+ switch (opt) {
+ case 0:
+ if (!strcmp(longopts[option_idx].name, "chunk_size"))
+ ctx->stripe.chunk_size = strtol(optarg, NULL, 10);
+ }
+ }
+}
+
+static void ublk_stripe_usage(const struct ublk_tgt_ops *ops)
+{
+ printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n");
+}
+
const struct ublk_tgt_ops stripe_tgt_ops = {
.name = "stripe",
.init_tgt = ublk_stripe_tgt_init,
.deinit_tgt = ublk_stripe_tgt_deinit,
.queue_io = ublk_stripe_queue_io,
.tgt_io_done = ublk_stripe_io_done,
+ .parse_cmd_line = ublk_stripe_cmd_line,
+ .usage = ublk_stripe_usage,
};
diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh
index a88b35943227..a81210ca3e99 100755
--- a/tools/testing/selftests/ublk/test_common.sh
+++ b/tools/testing/selftests/ublk/test_common.sh
@@ -17,8 +17,8 @@ _get_disk_dev_t() {
local minor
dev=/dev/ublkb"${dev_id}"
- major=$(stat -c '%Hr' "$dev")
- minor=$(stat -c '%Lr' "$dev")
+ major="0x"$(stat -c '%t' "$dev")
+ minor="0x"$(stat -c '%T' "$dev")
echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) ))
}
@@ -30,18 +30,26 @@ _run_fio_verify_io() {
}
_create_backfile() {
- local my_size=$1
- local my_file
+ local index=$1
+ local new_size=$2
+ local old_file
+ local new_file
- my_file=$(mktemp ublk_file_"${my_size}"_XXXXX)
- truncate -s "${my_size}" "${my_file}"
- echo "$my_file"
+ old_file="${UBLK_BACKFILES[$index]}"
+ [ -f "$old_file" ] && rm -f "$old_file"
+
+ new_file=$(mktemp ublk_file_"${new_size}"_XXXXX)
+ truncate -s "${new_size}" "${new_file}"
+ UBLK_BACKFILES["$index"]="$new_file"
}
-_remove_backfile() {
- local file=$1
+_remove_files() {
+ local file
- [ -f "$file" ] && rm -f "$file"
+ for file in "${UBLK_BACKFILES[@]}"; do
+ [ -f "$file" ] && rm -f "$file"
+ done
+ [ -f "$UBLK_TMP" ] && rm -f "$UBLK_TMP"
}
_create_tmp_dir() {
@@ -106,6 +114,7 @@ _prep_test() {
local type=$1
shift 1
modprobe ublk_drv > /dev/null 2>&1
+ UBLK_TMP=$(mktemp ublk_test_XXXXX)
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*"
}
@@ -129,7 +138,10 @@ _show_result()
echo "$1 : [FAIL]"
fi
fi
- [ "$2" -ne 0 ] && exit "$2"
+ if [ "$2" -ne 0 ]; then
+ _remove_files
+ exit "$2"
+ fi
return 0
}
@@ -138,16 +150,16 @@ _check_add_dev()
{
local tid=$1
local code=$2
- shift 2
+
if [ "${code}" -ne 0 ]; then
- _remove_test_files "$@"
_show_result "${tid}" "${code}"
fi
}
_cleanup_test() {
"${UBLK_PROG}" del -a
- rm -f "$UBLK_TMP"
+
+ _remove_files
}
_have_feature()
@@ -158,9 +170,11 @@ _have_feature()
return 1
}
-_add_ublk_dev() {
- local kublk_temp;
+_create_ublk_dev() {
local dev_id;
+ local cmd=$1
+
+ shift 1
if [ ! -c /dev/ublk-control ]; then
return ${UBLK_SKIP_CODE}
@@ -171,17 +185,34 @@ _add_ublk_dev() {
fi
fi
- kublk_temp=$(mktemp /tmp/kublk-XXXXXX)
- if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then
+ if ! dev_id=$("${UBLK_PROG}" "$cmd" "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then
echo "fail to add ublk dev $*"
- rm -f "${kublk_temp}"
return 255
fi
-
- dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}')
udevadm settle
- rm -f "${kublk_temp}"
- echo "${dev_id}"
+
+ if [[ "$dev_id" =~ ^[0-9]+$ ]]; then
+ echo "${dev_id}"
+ else
+ return 255
+ fi
+}
+
+_add_ublk_dev() {
+ _create_ublk_dev "add" "$@"
+}
+
+_recover_ublk_dev() {
+ local dev_id
+ local state
+
+ dev_id=$(_create_ublk_dev "recover" "$@")
+ for ((j=0;j<20;j++)); do
+ state=$(_get_ublk_dev_state "${dev_id}")
+ [ "$state" == "LIVE" ] && break
+ sleep 1
+ done
+ echo "$state"
}
# kill the ublk daemon and return ublk device state
@@ -220,7 +251,7 @@ __run_io_and_remove()
local kill_server=$3
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
- --rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \
+ --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
--runtime=20 --time_based > /dev/null 2>&1 &
sleep 2
if [ "${kill_server}" = "yes" ]; then
@@ -238,15 +269,80 @@ __run_io_and_remove()
wait
}
+run_io_and_remove()
+{
+ local size=$1
+ local dev_id
+ shift 1
+
+ dev_id=$(_add_ublk_dev "$@")
+ _check_add_dev "$TID" $?
+
+ [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
+ if ! __run_io_and_remove "$dev_id" "${size}" "no"; then
+ echo "/dev/ublkc$dev_id isn't removed"
+ exit 255
+ fi
+}
+
+run_io_and_kill_daemon()
+{
+ local size=$1
+ local dev_id
+ shift 1
+
+ dev_id=$(_add_ublk_dev "$@")
+ _check_add_dev "$TID" $?
+
+ [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)"
+ if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then
+ echo "/dev/ublkc$dev_id isn't removed res ${res}"
+ exit 255
+ fi
+}
+
+run_io_and_recover()
+{
+ local state
+ local dev_id
+
+ dev_id=$(_add_ublk_dev "$@")
+ _check_add_dev "$TID" $?
+
+ fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
+ --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
+ --runtime=20 --time_based > /dev/null 2>&1 &
+ sleep 4
+
+ state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED")
+ if [ "$state" != "QUIESCED" ]; then
+ echo "device isn't quiesced($state) after killing daemon"
+ return 255
+ fi
+
+ state=$(_recover_ublk_dev -n "$dev_id" "$@")
+ if [ "$state" != "LIVE" ]; then
+ echo "faile to recover to LIVE($state)"
+ return 255
+ fi
+
+ if ! __remove_ublk_dev_return "${dev_id}"; then
+ echo "delete dev ${dev_id} failed"
+ return 255
+ fi
+ wait
+}
+
+
_ublk_test_top_dir()
{
cd "$(dirname "$0")" && pwd
}
-UBLK_TMP=$(mktemp ublk_test_XXXXX)
UBLK_PROG=$(_ublk_test_top_dir)/kublk
UBLK_TEST_QUIET=1
UBLK_TEST_SHOW_RESULT=1
+UBLK_BACKFILES=()
export UBLK_PROG
export UBLK_TEST_QUIET
export UBLK_TEST_SHOW_RESULT
diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh
new file mode 100755
index 000000000000..8a3bc080c577
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_04.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_04"
+ERR_CODE=0
+
+ublk_run_recover_test()
+{
+ run_io_and_recover "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "recover" "basic recover function verification"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_run_recover_test -t null -q 2 -r 1 &
+ublk_run_recover_test -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_run_recover_test -t null -q 2 -r 1 -i 1 &
+ublk_run_recover_test -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "recover"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh
new file mode 100755
index 000000000000..3bb00a347402
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_05.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_05"
+ERR_CODE=0
+
+ublk_run_recover_test()
+{
+ run_io_and_recover "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_feature "ZERO_COPY"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "recover" "basic recover function verification (zero copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_run_recover_test -t null -q 2 -r 1 -z &
+ublk_run_recover_test -t loop -q 2 -r 1 -z "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_run_recover_test -t null -q 2 -r 1 -z -i 1 &
+ublk_run_recover_test -t loop -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "recover"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh
new file mode 100755
index 000000000000..b67230c42c84
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_06.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_06"
+ERR_CODE=0
+
+_prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server"
+
+# configure ublk server to sleep 2s before completing each I/O
+dev_id=$(_add_ublk_dev -t fault_inject -q 2 -d 1 --delay_us 2000000)
+_check_add_dev $TID $?
+
+STARTTIME=${SECONDS}
+
+dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 &
+dd_pid=$!
+
+__ublk_kill_daemon ${dev_id} "DEAD"
+
+wait $dd_pid
+dd_exitcode=$?
+
+ENDTIME=${SECONDS}
+ELAPSED=$(($ENDTIME - $STARTTIME))
+
+# assert that dd sees an error and exits quickly after ublk server is
+# killed. previously this relied on seeing an I/O timeout and so would
+# take ~30s
+if [ $dd_exitcode -eq 0 ]; then
+ echo "dd unexpectedly exited successfully!"
+ ERR_CODE=255
+fi
+if [ $ELAPSED -ge 5 ]; then
+ echo "dd took $ELAPSED seconds to exit (>= 5s tolerance)!"
+ ERR_CODE=255
+fi
+
+_cleanup_test "fault_inject"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_07.sh b/tools/testing/selftests/ublk/test_generic_07.sh
new file mode 100755
index 000000000000..cba86451fa5e
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_07.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_07"
+ERR_CODE=0
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "generic" "test UBLK_F_NEED_GET_DATA"
+
+_create_backfile 0 256M
+dev_id=$(_add_ublk_dev -t loop -q 2 -g "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+# run fio over the ublk disk
+_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
+ERR_CODE=$?
+if [ "$ERR_CODE" -eq 0 ]; then
+ _mkfs_mount_test /dev/ublkb"${dev_id}"
+ ERR_CODE=$?
+fi
+
+_cleanup_test "generic"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh
index 1ef8b6044777..833fa0dbc700 100755
--- a/tools/testing/selftests/ublk/test_loop_01.sh
+++ b/tools/testing/selftests/ublk/test_loop_01.sh
@@ -12,10 +12,10 @@ fi
_prep_test "loop" "write and verify test"
-backfile_0=$(_create_backfile 256M)
+_create_backfile 0 256M
-dev_id=$(_add_ublk_dev -t loop "$backfile_0")
-_check_add_dev $TID $? "${backfile_0}"
+dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
@@ -23,6 +23,4 @@ ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_02.sh b/tools/testing/selftests/ublk/test_loop_02.sh
index 03863d825e07..874568b3646b 100755
--- a/tools/testing/selftests/ublk/test_loop_02.sh
+++ b/tools/testing/selftests/ublk/test_loop_02.sh
@@ -8,15 +8,13 @@ ERR_CODE=0
_prep_test "loop" "mkfs & mount & umount"
-backfile_0=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t loop "$backfile_0")
-_check_add_dev $TID $? "$backfile_0"
+_create_backfile 0 256M
+dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh
index e9ca744de8b1..c30f797c6429 100755
--- a/tools/testing/selftests/ublk/test_loop_03.sh
+++ b/tools/testing/selftests/ublk/test_loop_03.sh
@@ -12,9 +12,9 @@ fi
_prep_test "loop" "write and verify over zero copy"
-backfile_0=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
-_check_add_dev $TID $? "$backfile_0"
+_create_backfile 0 256M
+dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
@@ -22,6 +22,4 @@ ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_04.sh b/tools/testing/selftests/ublk/test_loop_04.sh
index 1435422c38ec..b01d75b3214d 100755
--- a/tools/testing/selftests/ublk/test_loop_04.sh
+++ b/tools/testing/selftests/ublk/test_loop_04.sh
@@ -8,15 +8,14 @@ ERR_CODE=0
_prep_test "loop" "mkfs & mount & umount with zero copy"
-backfile_0=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
-_check_add_dev $TID $? "$backfile_0"
+_create_backfile 0 256M
+
+dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh
index 2e6e2e6978fc..de2141533074 100755
--- a/tools/testing/selftests/ublk/test_loop_05.sh
+++ b/tools/testing/selftests/ublk/test_loop_05.sh
@@ -12,10 +12,10 @@ fi
_prep_test "loop" "write and verify test"
-backfile_0=$(_create_backfile 256M)
+_create_backfile 0 256M
-dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0")
-_check_add_dev $TID $? "${backfile_0}"
+dev_id=$(_add_ublk_dev -q 2 -t loop "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
@@ -23,6 +23,4 @@ ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh
index a8be24532b24..7d3150f057d4 100755
--- a/tools/testing/selftests/ublk/test_stress_01.sh
+++ b/tools/testing/selftests/ublk/test_stress_01.sh
@@ -4,44 +4,31 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="stress_01"
ERR_CODE=0
-DEV_ID=-1
ublk_io_and_remove()
{
- local size=$1
- shift 1
- local backfile=""
- if echo "$@" | grep -q "loop"; then
- backfile=${*: -1}
- fi
- DEV_ID=$(_add_ublk_dev "$@")
- _check_add_dev $TID $? "${backfile}"
-
- [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
- if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then
- echo "/dev/ublkc${DEV_ID} isn't removed"
- _remove_backfile "${backfile}"
- exit 255
+ run_io_and_remove "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
fi
}
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
_prep_test "stress" "run IO and remove device"
-ublk_io_and_remove 8G -t null -q 4
-ERR_CODE=$?
-if [ ${ERR_CODE} -ne 0 ]; then
- _show_result $TID $ERR_CODE
-fi
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
-BACK_FILE=$(_create_backfile 256M)
-ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}"
-ERR_CODE=$?
-if [ ${ERR_CODE} -ne 0 ]; then
- _show_result $TID $ERR_CODE
-fi
+ublk_io_and_remove 8G -t null -q 4 &
+ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" &
+ublk_io_and_remove 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
-ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}"
-ERR_CODE=$?
_cleanup_test "stress"
-_remove_backfile "${BACK_FILE}"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh
index 2159e4cc8140..1a9065125ae1 100755
--- a/tools/testing/selftests/ublk/test_stress_02.sh
+++ b/tools/testing/selftests/ublk/test_stress_02.sh
@@ -4,44 +4,31 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="stress_02"
ERR_CODE=0
-DEV_ID=-1
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
ublk_io_and_kill_daemon()
{
- local size=$1
- shift 1
- local backfile=""
- if echo "$@" | grep -q "loop"; then
- backfile=${*: -1}
- fi
- DEV_ID=$(_add_ublk_dev "$@")
- _check_add_dev $TID $? "${backfile}"
-
- [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)"
- if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then
- echo "/dev/ublkc${DEV_ID} isn't removed res ${res}"
- _remove_backfile "${backfile}"
- exit 255
+ run_io_and_kill_daemon "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
fi
}
_prep_test "stress" "run IO and kill ublk server"
-ublk_io_and_kill_daemon 8G -t null -q 4
-ERR_CODE=$?
-if [ ${ERR_CODE} -ne 0 ]; then
- _show_result $TID $ERR_CODE
-fi
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
-BACK_FILE=$(_create_backfile 256M)
-ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}"
-ERR_CODE=$?
-if [ ${ERR_CODE} -ne 0 ]; then
- _show_result $TID $ERR_CODE
-fi
+ublk_io_and_kill_daemon 8G -t null -q 4 &
+ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" &
+ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
-ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}"
-ERR_CODE=$?
_cleanup_test "stress"
-_remove_backfile "${BACK_FILE}"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh
new file mode 100755
index 000000000000..e0854f71d35b
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_03.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_03"
+ERR_CODE=0
+
+ublk_io_and_remove()
+{
+ run_io_and_remove "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_feature "ZERO_COPY"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stress" "run IO and remove device(zero copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_io_and_remove 8G -t null -q 4 -z &
+ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
+ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh
new file mode 100755
index 000000000000..1798a98387e8
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_04.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_04"
+ERR_CODE=0
+
+ublk_io_and_kill_daemon()
+{
+ run_io_and_kill_daemon "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+if ! _have_feature "ZERO_COPY"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stress" "run IO and kill ublk server(zero copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_io_and_kill_daemon 8G -t null -q 4 -z &
+ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
+ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh
new file mode 100755
index 000000000000..88601b48f1cd
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_05.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_05"
+ERR_CODE=0
+
+run_io_and_remove()
+{
+ local size=$1
+ local dev_id
+ local dev_pid
+ shift 1
+
+ dev_id=$(_add_ublk_dev "$@")
+ _check_add_dev $TID $?
+
+ [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
+
+ fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
+ --rw=readwrite --iodepth=128 --size="${size}" --numjobs=4 \
+ --runtime=40 --time_based > /dev/null 2>&1 &
+ sleep 4
+
+ dev_pid=$(_get_ublk_daemon_pid "$dev_id")
+ kill -9 "$dev_pid"
+
+ if ! __remove_ublk_dev_return "${dev_id}"; then
+ echo "delete dev ${dev_id} failed"
+ return 255
+ fi
+}
+
+ublk_io_and_remove()
+{
+ run_io_and_remove "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+_prep_test "stress" "run IO and remove device with recovery enabled"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+for reissue in $(seq 0 1); do
+ ublk_io_and_remove 8G -t null -q 4 -g -r 1 -i "$reissue" &
+ ublk_io_and_remove 256M -t loop -q 4 -g -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" &
+ wait
+done
+
+if _have_feature "ZERO_COPY"; then
+ for reissue in $(seq 0 1); do
+ ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" &
+ ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
+ wait
+ done
+fi
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh
index 7e387ef656ea..4e4f0fdf3c9b 100755
--- a/tools/testing/selftests/ublk/test_stripe_01.sh
+++ b/tools/testing/selftests/ublk/test_stripe_01.sh
@@ -12,19 +12,15 @@ fi
_prep_test "stripe" "write and verify test"
-backfile_0=$(_create_backfile 256M)
-backfile_1=$(_create_backfile 256M)
+_create_backfile 0 256M
+_create_backfile 1 256M
-dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
-_check_add_dev $TID $? "${backfile_0}"
+dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
ERR_CODE=$?
_cleanup_test "stripe"
-
-_remove_backfile "$backfile_0"
-_remove_backfile "$backfile_1"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_02.sh b/tools/testing/selftests/ublk/test_stripe_02.sh
index e8a45fa82dde..5820ab2efba4 100755
--- a/tools/testing/selftests/ublk/test_stripe_02.sh
+++ b/tools/testing/selftests/ublk/test_stripe_02.sh
@@ -8,17 +8,14 @@ ERR_CODE=0
_prep_test "stripe" "mkfs & mount & umount"
-backfile_0=$(_create_backfile 256M)
-backfile_1=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
-_check_add_dev $TID $? "$backfile_0" "$backfile_1"
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "stripe"
-
-_remove_backfile "$backfile_0"
-_remove_backfile "$backfile_1"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh
index c1b34af36145..20b977e27814 100755
--- a/tools/testing/selftests/ublk/test_stripe_03.sh
+++ b/tools/testing/selftests/ublk/test_stripe_03.sh
@@ -12,19 +12,15 @@ fi
_prep_test "stripe" "write and verify test"
-backfile_0=$(_create_backfile 256M)
-backfile_1=$(_create_backfile 256M)
+_create_backfile 0 256M
+_create_backfile 1 256M
-dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1")
-_check_add_dev $TID $? "${backfile_0}"
+dev_id=$(_add_ublk_dev -q 2 -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
ERR_CODE=$?
_cleanup_test "stripe"
-
-_remove_backfile "$backfile_0"
-_remove_backfile "$backfile_1"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh
index 1f2b642381d1..1b51ed2f1d84 100755
--- a/tools/testing/selftests/ublk/test_stripe_04.sh
+++ b/tools/testing/selftests/ublk/test_stripe_04.sh
@@ -8,17 +8,14 @@ ERR_CODE=0
_prep_test "stripe" "mkfs & mount & umount on zero copy"
-backfile_0=$(_create_backfile 256M)
-backfile_1=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t stripe -z -q 2 "$backfile_0" "$backfile_1")
-_check_add_dev $TID $? "$backfile_0" "$backfile_1"
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t stripe -z -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "stripe"
-
-_remove_backfile "$backfile_0"
-_remove_backfile "$backfile_1"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/x86/bugs/Makefile b/tools/testing/selftests/x86/bugs/Makefile
new file mode 100644
index 000000000000..8ff2d7226c7f
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/Makefile
@@ -0,0 +1,3 @@
+TEST_PROGS := its_sysfs.py its_permutations.py its_indirect_alignment.py its_ret_alignment.py
+TEST_FILES := common.py
+include ../../lib.mk
diff --git a/tools/testing/selftests/x86/bugs/common.py b/tools/testing/selftests/x86/bugs/common.py
new file mode 100755
index 000000000000..2f9664a80617
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/common.py
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# This contains kselftest framework adapted common functions for testing
+# mitigation for x86 bugs.
+
+import os, sys, re, shutil
+
+sys.path.insert(0, '../../kselftest')
+import ksft
+
+def read_file(path):
+ if not os.path.exists(path):
+ return None
+ with open(path, 'r') as file:
+ return file.read().strip()
+
+def cpuinfo_has(arg):
+ cpuinfo = read_file('/proc/cpuinfo')
+ if arg in cpuinfo:
+ return True
+ return False
+
+def cmdline_has(arg):
+ cmdline = read_file('/proc/cmdline')
+ if arg in cmdline:
+ return True
+ return False
+
+def cmdline_has_either(args):
+ cmdline = read_file('/proc/cmdline')
+ for arg in args:
+ if arg in cmdline:
+ return True
+ return False
+
+def cmdline_has_none(args):
+ return not cmdline_has_either(args)
+
+def cmdline_has_all(args):
+ cmdline = read_file('/proc/cmdline')
+ for arg in args:
+ if arg not in cmdline:
+ return False
+ return True
+
+def get_sysfs(bug):
+ return read_file("/sys/devices/system/cpu/vulnerabilities/" + bug)
+
+def sysfs_has(bug, mitigation):
+ status = get_sysfs(bug)
+ if mitigation in status:
+ return True
+ return False
+
+def sysfs_has_either(bugs, mitigations):
+ for bug in bugs:
+ for mitigation in mitigations:
+ if sysfs_has(bug, mitigation):
+ return True
+ return False
+
+def sysfs_has_none(bugs, mitigations):
+ return not sysfs_has_either(bugs, mitigations)
+
+def sysfs_has_all(bugs, mitigations):
+ for bug in bugs:
+ for mitigation in mitigations:
+ if not sysfs_has(bug, mitigation):
+ return False
+ return True
+
+def bug_check_pass(bug, found):
+ ksft.print_msg(f"\nFound: {found}")
+ # ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}")
+ ksft.test_result_pass(f'{bug}: {found}')
+
+def bug_check_fail(bug, found, expected):
+ ksft.print_msg(f'\nFound:\t {found}')
+ ksft.print_msg(f'Expected:\t {expected}')
+ ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}")
+ ksft.test_result_fail(f'{bug}: {found}')
+
+def bug_status_unknown(bug, found):
+ ksft.print_msg(f'\nUnknown status: {found}')
+ ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}")
+ ksft.test_result_fail(f'{bug}: {found}')
+
+def basic_checks_sufficient(bug, mitigation):
+ if not mitigation:
+ bug_status_unknown(bug, "None")
+ return True
+ elif mitigation == "Not affected":
+ ksft.test_result_pass(bug)
+ return True
+ elif mitigation == "Vulnerable":
+ if cmdline_has_either([f'{bug}=off', 'mitigations=off']):
+ bug_check_pass(bug, mitigation)
+ return True
+ return False
+
+def get_section_info(vmlinux, section_name):
+ from elftools.elf.elffile import ELFFile
+ with open(vmlinux, 'rb') as f:
+ elffile = ELFFile(f)
+ section = elffile.get_section_by_name(section_name)
+ if section is None:
+ ksft.print_msg("Available sections in vmlinux:")
+ for sec in elffile.iter_sections():
+ ksft.print_msg(sec.name)
+ raise ValueError(f"Section {section_name} not found in {vmlinux}")
+ return section['sh_addr'], section['sh_offset'], section['sh_size']
+
+def get_patch_sites(vmlinux, offset, size):
+ import struct
+ output = []
+ with open(vmlinux, 'rb') as f:
+ f.seek(offset)
+ i = 0
+ while i < size:
+ data = f.read(4) # s32
+ if not data:
+ break
+ sym_offset = struct.unpack('<i', data)[0] + i
+ i += 4
+ output.append(sym_offset)
+ return output
+
+def get_instruction_from_vmlinux(elffile, section, virtual_address, target_address):
+ from capstone import Cs, CS_ARCH_X86, CS_MODE_64
+ section_start = section['sh_addr']
+ section_end = section_start + section['sh_size']
+
+ if not (section_start <= target_address < section_end):
+ return None
+
+ offset = target_address - section_start
+ code = section.data()[offset:offset + 16]
+
+ cap = init_capstone()
+ for instruction in cap.disasm(code, target_address):
+ if instruction.address == target_address:
+ return instruction
+ return None
+
+def init_capstone():
+ from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_OPT_SYNTAX_ATT
+ cap = Cs(CS_ARCH_X86, CS_MODE_64)
+ cap.syntax = CS_OPT_SYNTAX_ATT
+ return cap
+
+def get_runtime_kernel():
+ import drgn
+ return drgn.program_from_kernel()
+
+def check_dependencies_or_skip(modules, script_name="unknown test"):
+ for mod in modules:
+ try:
+ __import__(mod)
+ except ImportError:
+ ksft.test_result_skip(f"Skipping {script_name}: missing module '{mod}'")
+ ksft.finished()
diff --git a/tools/testing/selftests/x86/bugs/its_indirect_alignment.py b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py
new file mode 100755
index 000000000000..cdc33ae6a91c
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Test for indirect target selection (ITS) mitigation.
+#
+# Test if indirect CALL/JMP are correctly patched by evaluating
+# the vmlinux .retpoline_sites in /proc/kcore.
+
+# Install dependencies
+# add-apt-repository ppa:michel-slm/kernel-utils
+# apt update
+# apt install -y python3-drgn python3-pyelftools python3-capstone
+#
+# Best to copy the vmlinux at a standard location:
+# mkdir -p /usr/lib/debug/lib/modules/$(uname -r)
+# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux
+#
+# Usage: ./its_indirect_alignment.py [vmlinux]
+
+import os, sys, argparse
+from pathlib import Path
+
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, this_dir + '/../../kselftest')
+import ksft
+import common as c
+
+bug = "indirect_target_selection"
+
+mitigation = c.get_sysfs(bug)
+if not mitigation or "Aligned branch/return thunks" not in mitigation:
+ ksft.test_result_skip("Skipping its_indirect_alignment.py: Aligned branch/return thunks not enabled")
+ ksft.finished()
+
+if c.sysfs_has("spectre_v2", "Retpolines"):
+ ksft.test_result_skip("Skipping its_indirect_alignment.py: Retpolines deployed")
+ ksft.finished()
+
+c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_indirect_alignment.py")
+
+from elftools.elf.elffile import ELFFile
+from drgn.helpers.common.memory import identify_address
+
+cap = c.init_capstone()
+
+if len(os.sys.argv) > 1:
+ arg_vmlinux = os.sys.argv[1]
+ if not os.path.exists(arg_vmlinux):
+ ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at argument path: {arg_vmlinux}")
+ ksft.exit_fail()
+ os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True)
+ os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux')
+
+vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux"
+if not os.path.exists(vmlinux):
+ ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at {vmlinux}")
+ ksft.exit_fail()
+
+ksft.print_msg(f"Using vmlinux: {vmlinux}")
+
+retpolines_start_vmlinux, retpolines_sec_offset, size = c.get_section_info(vmlinux, '.retpoline_sites')
+ksft.print_msg(f"vmlinux: Section .retpoline_sites (0x{retpolines_start_vmlinux:x}) found at 0x{retpolines_sec_offset:x} with size 0x{size:x}")
+
+sites_offset = c.get_patch_sites(vmlinux, retpolines_sec_offset, size)
+total_retpoline_tests = len(sites_offset)
+ksft.print_msg(f"Found {total_retpoline_tests} retpoline sites")
+
+prog = c.get_runtime_kernel()
+retpolines_start_kcore = prog.symbol('__retpoline_sites').address
+ksft.print_msg(f'kcore: __retpoline_sites: 0x{retpolines_start_kcore:x}')
+
+x86_indirect_its_thunk_r15 = prog.symbol('__x86_indirect_its_thunk_r15').address
+ksft.print_msg(f'kcore: __x86_indirect_its_thunk_r15: 0x{x86_indirect_its_thunk_r15:x}')
+
+tests_passed = 0
+tests_failed = 0
+tests_unknown = 0
+
+with open(vmlinux, 'rb') as f:
+ elffile = ELFFile(f)
+ text_section = elffile.get_section_by_name('.text')
+
+ for i in range(0, len(sites_offset)):
+ site = retpolines_start_kcore + sites_offset[i]
+ vmlinux_site = retpolines_start_vmlinux + sites_offset[i]
+ passed = unknown = failed = False
+ try:
+ vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site)
+ kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0]
+ operand = kcore_insn.op_str
+ insn_end = site + kcore_insn.size - 1 # TODO handle Jcc.32 __x86_indirect_thunk_\reg
+ safe_site = insn_end & 0x20
+ site_status = "" if safe_site else "(unsafe)"
+
+ ksft.print_msg(f"\nSite {i}: {identify_address(prog, site)} <0x{site:x}> {site_status}")
+ ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}")
+ ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}")
+
+ if (site & 0x20) ^ (insn_end & 0x20):
+ ksft.print_msg(f"\tSite at safe/unsafe boundary: {str(kcore_insn.bytes)} {kcore_insn.mnemonic} {operand}")
+ if safe_site:
+ tests_passed += 1
+ passed = True
+ ksft.print_msg(f"\tPASSED: At safe address")
+ continue
+
+ if operand.startswith('0xffffffff'):
+ thunk = int(operand, 16)
+ if thunk > x86_indirect_its_thunk_r15:
+ insn_at_thunk = list(cap.disasm(prog.read(thunk, 16), thunk))[0]
+ operand += ' -> ' + insn_at_thunk.mnemonic + ' ' + insn_at_thunk.op_str + ' <dynamic-thunk?>'
+ if 'jmp' in insn_at_thunk.mnemonic and thunk & 0x20:
+ ksft.print_msg(f"\tPASSED: Found {operand} at safe address")
+ passed = True
+ if not passed:
+ if kcore_insn.operands[0].type == capstone.CS_OP_IMM:
+ operand += ' <' + prog.symbol(int(operand, 16)) + '>'
+ if '__x86_indirect_its_thunk_' in operand:
+ ksft.print_msg(f"\tPASSED: Found {operand}")
+ else:
+ ksft.print_msg(f"\tPASSED: Found direct branch: {kcore_insn}, ITS thunk not required.")
+ passed = True
+ else:
+ unknown = True
+ if passed:
+ tests_passed += 1
+ elif unknown:
+ ksft.print_msg(f"UNKNOWN: unexpected operand: {kcore_insn}")
+ tests_unknown += 1
+ else:
+ ksft.print_msg(f'\t************* FAILED *************')
+ ksft.print_msg(f"\tFound {kcore_insn.bytes} {kcore_insn.mnemonic} {operand}")
+ ksft.print_msg(f'\t**********************************')
+ tests_failed += 1
+ except Exception as e:
+ ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}")
+ tests_unknown += 1
+
+ksft.print_msg(f"\n\nSummary:")
+ksft.print_msg(f"PASS: \t{tests_passed} \t/ {total_retpoline_tests}")
+ksft.print_msg(f"FAIL: \t{tests_failed} \t/ {total_retpoline_tests}")
+ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_retpoline_tests}")
+
+if tests_failed == 0:
+ ksft.test_result_pass("All ITS return thunk sites passed")
+else:
+ ksft.test_result_fail(f"{tests_failed} ITS return thunk sites failed")
+ksft.finished()
diff --git a/tools/testing/selftests/x86/bugs/its_permutations.py b/tools/testing/selftests/x86/bugs/its_permutations.py
new file mode 100755
index 000000000000..3204f4728c62
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/its_permutations.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Test for indirect target selection (ITS) cmdline permutations with other bugs
+# like spectre_v2 and retbleed.
+
+import os, sys, subprocess, itertools, re, shutil
+
+test_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, test_dir + '/../../kselftest')
+import ksft
+import common as c
+
+bug = "indirect_target_selection"
+mitigation = c.get_sysfs(bug)
+
+if not mitigation or "Not affected" in mitigation:
+ ksft.test_result_skip("Skipping its_permutations.py: not applicable")
+ ksft.finished()
+
+if shutil.which('vng') is None:
+ ksft.test_result_skip("Skipping its_permutations.py: virtme-ng ('vng') not found in PATH.")
+ ksft.finished()
+
+TEST = f"{test_dir}/its_sysfs.py"
+default_kparam = ['clearcpuid=hypervisor', 'panic=5', 'panic_on_warn=1', 'oops=panic', 'nmi_watchdog=1', 'hung_task_panic=1']
+
+DEBUG = " -v "
+
+# Install dependencies
+# https://github.com/arighi/virtme-ng
+# apt install virtme-ng
+BOOT_CMD = f"vng --run {test_dir}/../../../../../arch/x86/boot/bzImage "
+#BOOT_CMD += DEBUG
+
+bug = "indirect_target_selection"
+
+input_options = {
+ 'indirect_target_selection' : ['off', 'on', 'stuff', 'vmexit'],
+ 'retbleed' : ['off', 'stuff', 'auto'],
+ 'spectre_v2' : ['off', 'on', 'eibrs', 'retpoline', 'ibrs', 'eibrs,retpoline'],
+}
+
+def pretty_print(output):
+ OKBLUE = '\033[94m'
+ OKGREEN = '\033[92m'
+ WARNING = '\033[93m'
+ FAIL = '\033[91m'
+ ENDC = '\033[0m'
+ BOLD = '\033[1m'
+
+ # Define patterns and their corresponding colors
+ patterns = {
+ r"^ok \d+": OKGREEN,
+ r"^not ok \d+": FAIL,
+ r"^# Testing .*": OKBLUE,
+ r"^# Found: .*": WARNING,
+ r"^# Totals: .*": BOLD,
+ r"pass:([1-9]\d*)": OKGREEN,
+ r"fail:([1-9]\d*)": FAIL,
+ r"skip:([1-9]\d*)": WARNING,
+ }
+
+ # Apply colors based on patterns
+ for pattern, color in patterns.items():
+ output = re.sub(pattern, lambda match: f"{color}{match.group(0)}{ENDC}", output, flags=re.MULTILINE)
+
+ print(output)
+
+combinations = list(itertools.product(*input_options.values()))
+ksft.print_header()
+ksft.set_plan(len(combinations))
+
+logs = ""
+
+for combination in combinations:
+ append = ""
+ log = ""
+ for p in default_kparam:
+ append += f' --append={p}'
+ command = BOOT_CMD + append
+ test_params = ""
+ for i, key in enumerate(input_options.keys()):
+ param = f'{key}={combination[i]}'
+ test_params += f' {param}'
+ command += f" --append={param}"
+ command += f" -- {TEST}"
+ test_name = f"{bug} {test_params}"
+ pretty_print(f'# Testing {test_name}')
+ t = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ t.wait()
+ output, _ = t.communicate()
+ if t.returncode == 0:
+ ksft.test_result_pass(test_name)
+ else:
+ ksft.test_result_fail(test_name)
+ output = output.decode()
+ log += f" {output}"
+ pretty_print(log)
+ logs += output + "\n"
+
+# Optionally use tappy to parse the output
+# apt install python3-tappy
+with open("logs.txt", "w") as f:
+ f.write(logs)
+
+ksft.finished()
diff --git a/tools/testing/selftests/x86/bugs/its_ret_alignment.py b/tools/testing/selftests/x86/bugs/its_ret_alignment.py
new file mode 100755
index 000000000000..f40078d9f6ff
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/its_ret_alignment.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Test for indirect target selection (ITS) mitigation.
+#
+# Tests if the RETs are correctly patched by evaluating the
+# vmlinux .return_sites in /proc/kcore.
+#
+# Install dependencies
+# add-apt-repository ppa:michel-slm/kernel-utils
+# apt update
+# apt install -y python3-drgn python3-pyelftools python3-capstone
+#
+# Run on target machine
+# mkdir -p /usr/lib/debug/lib/modules/$(uname -r)
+# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux
+#
+# Usage: ./its_ret_alignment.py
+
+import os, sys, argparse
+from pathlib import Path
+
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, this_dir + '/../../kselftest')
+import ksft
+import common as c
+
+bug = "indirect_target_selection"
+mitigation = c.get_sysfs(bug)
+if not mitigation or "Aligned branch/return thunks" not in mitigation:
+ ksft.test_result_skip("Skipping its_ret_alignment.py: Aligned branch/return thunks not enabled")
+ ksft.finished()
+
+c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_ret_alignment.py")
+
+from elftools.elf.elffile import ELFFile
+from drgn.helpers.common.memory import identify_address
+
+cap = c.init_capstone()
+
+if len(os.sys.argv) > 1:
+ arg_vmlinux = os.sys.argv[1]
+ if not os.path.exists(arg_vmlinux):
+ ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at user-supplied path: {arg_vmlinux}")
+ ksft.exit_fail()
+ os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True)
+ os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux')
+
+vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux"
+if not os.path.exists(vmlinux):
+ ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at {vmlinux}")
+ ksft.exit_fail()
+
+ksft.print_msg(f"Using vmlinux: {vmlinux}")
+
+rethunks_start_vmlinux, rethunks_sec_offset, size = c.get_section_info(vmlinux, '.return_sites')
+ksft.print_msg(f"vmlinux: Section .return_sites (0x{rethunks_start_vmlinux:x}) found at 0x{rethunks_sec_offset:x} with size 0x{size:x}")
+
+sites_offset = c.get_patch_sites(vmlinux, rethunks_sec_offset, size)
+total_rethunk_tests = len(sites_offset)
+ksft.print_msg(f"Found {total_rethunk_tests} rethunk sites")
+
+prog = c.get_runtime_kernel()
+rethunks_start_kcore = prog.symbol('__return_sites').address
+ksft.print_msg(f'kcore: __rethunk_sites: 0x{rethunks_start_kcore:x}')
+
+its_return_thunk = prog.symbol('its_return_thunk').address
+ksft.print_msg(f'kcore: its_return_thunk: 0x{its_return_thunk:x}')
+
+tests_passed = 0
+tests_failed = 0
+tests_unknown = 0
+tests_skipped = 0
+
+with open(vmlinux, 'rb') as f:
+ elffile = ELFFile(f)
+ text_section = elffile.get_section_by_name('.text')
+
+ for i in range(len(sites_offset)):
+ site = rethunks_start_kcore + sites_offset[i]
+ vmlinux_site = rethunks_start_vmlinux + sites_offset[i]
+ try:
+ passed = unknown = failed = skipped = False
+
+ symbol = identify_address(prog, site)
+ vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site)
+ kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0]
+
+ insn_end = site + kcore_insn.size - 1
+
+ safe_site = insn_end & 0x20
+ site_status = "" if safe_site else "(unsafe)"
+
+ ksft.print_msg(f"\nSite {i}: {symbol} <0x{site:x}> {site_status}")
+ ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}")
+ ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}")
+
+ if safe_site:
+ tests_passed += 1
+ passed = True
+ ksft.print_msg(f"\tPASSED: At safe address")
+ continue
+
+ if "jmp" in kcore_insn.mnemonic:
+ passed = True
+ elif "ret" not in kcore_insn.mnemonic:
+ skipped = True
+
+ if passed:
+ ksft.print_msg(f"\tPASSED: Found {kcore_insn.mnemonic} {kcore_insn.op_str}")
+ tests_passed += 1
+ elif skipped:
+ ksft.print_msg(f"\tSKIPPED: Found '{kcore_insn.mnemonic}'")
+ tests_skipped += 1
+ elif unknown:
+ ksft.print_msg(f"UNKNOWN: An unknown instruction: {kcore_insn}")
+ tests_unknown += 1
+ else:
+ ksft.print_msg(f'\t************* FAILED *************')
+ ksft.print_msg(f"\tFound {kcore_insn.mnemonic} {kcore_insn.op_str}")
+ ksft.print_msg(f'\t**********************************')
+ tests_failed += 1
+ except Exception as e:
+ ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}")
+ tests_unknown += 1
+
+ksft.print_msg(f"\n\nSummary:")
+ksft.print_msg(f"PASSED: \t{tests_passed} \t/ {total_rethunk_tests}")
+ksft.print_msg(f"FAILED: \t{tests_failed} \t/ {total_rethunk_tests}")
+ksft.print_msg(f"SKIPPED: \t{tests_skipped} \t/ {total_rethunk_tests}")
+ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_rethunk_tests}")
+
+if tests_failed == 0:
+ ksft.test_result_pass("All ITS return thunk sites passed.")
+else:
+ ksft.test_result_fail(f"{tests_failed} failed sites need ITS return thunks.")
+ksft.finished()
diff --git a/tools/testing/selftests/x86/bugs/its_sysfs.py b/tools/testing/selftests/x86/bugs/its_sysfs.py
new file mode 100755
index 000000000000..7bca81f2f606
--- /dev/null
+++ b/tools/testing/selftests/x86/bugs/its_sysfs.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025 Intel Corporation
+#
+# Test for Indirect Target Selection(ITS) mitigation sysfs status.
+
+import sys, os, re
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, this_dir + '/../../kselftest')
+import ksft
+
+from common import *
+
+bug = "indirect_target_selection"
+mitigation = get_sysfs(bug)
+
+ITS_MITIGATION_ALIGNED_THUNKS = "Mitigation: Aligned branch/return thunks"
+ITS_MITIGATION_RETPOLINE_STUFF = "Mitigation: Retpolines, Stuffing RSB"
+ITS_MITIGATION_VMEXIT_ONLY = "Mitigation: Vulnerable, KVM: Not affected"
+ITS_MITIGATION_VULNERABLE = "Vulnerable"
+
+def check_mitigation():
+ if mitigation == ITS_MITIGATION_ALIGNED_THUNKS:
+ if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"):
+ bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_RETPOLINE_STUFF)
+ return
+ if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'):
+ bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_VMEXIT_ONLY)
+ return
+ bug_check_pass(bug, ITS_MITIGATION_ALIGNED_THUNKS)
+ return
+
+ if mitigation == ITS_MITIGATION_RETPOLINE_STUFF:
+ if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"):
+ bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF)
+ return
+ if sysfs_has('retbleed', 'Stuffing'):
+ bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF)
+ return
+ bug_check_fail(bug, ITS_MITIGATION_RETPOLINE_STUFF, ITS_MITIGATION_ALIGNED_THUNKS)
+
+ if mitigation == ITS_MITIGATION_VMEXIT_ONLY:
+ if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'):
+ bug_check_pass(bug, ITS_MITIGATION_VMEXIT_ONLY)
+ return
+ bug_check_fail(bug, ITS_MITIGATION_VMEXIT_ONLY, ITS_MITIGATION_ALIGNED_THUNKS)
+
+ if mitigation == ITS_MITIGATION_VULNERABLE:
+ if sysfs_has("spectre_v2", "Vulnerable"):
+ bug_check_pass(bug, ITS_MITIGATION_VULNERABLE)
+ else:
+ bug_check_fail(bug, "Mitigation", ITS_MITIGATION_VULNERABLE)
+
+ bug_status_unknown(bug, mitigation)
+ return
+
+ksft.print_header()
+ksft.set_plan(1)
+ksft.print_msg(f'{bug}: {mitigation} ...')
+
+if not basic_checks_sufficient(bug, mitigation):
+ check_mitigation()
+
+ksft.finished()
diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c
index 66dbb362385f..0f97fb0d19e1 100644
--- a/tools/testing/shared/linux.c
+++ b/tools/testing/shared/linux.c
@@ -150,7 +150,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list)
{
if (kmalloc_verbose)
- pr_debug("Bulk free %p[0-%lu]\n", list, size - 1);
+ pr_debug("Bulk free %p[0-%zu]\n", list, size - 1);
pthread_mutex_lock(&cachep->lock);
for (int i = 0; i < size; i++)
@@ -168,7 +168,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
size_t i;
if (kmalloc_verbose)
- pr_debug("Bulk alloc %lu\n", size);
+ pr_debug("Bulk alloc %zu\n", size);
pthread_mutex_lock(&cachep->lock);
if (cachep->nr_objs >= size) {
diff --git a/tools/testing/shared/linux/cleanup.h b/tools/testing/shared/linux/cleanup.h
new file mode 100644
index 000000000000..ea3081426ee9
--- /dev/null
+++ b/tools/testing/shared/linux/cleanup.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "../../../../include/linux/cleanup.h"
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index d0f6d253ac72..613551132a96 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -1264,21 +1264,25 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type)
send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
control_expectln("RECEIVED");
- ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
- if (ret < 0) {
- if (errno == EOPNOTSUPP) {
- fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
- } else {
+ /* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though
+ * the "RECEIVED" message means that the other side has received the
+ * data, there can be a delay in our kernel before updating the "unsent
+ * bytes" counter. Repeat SIOCOUTQ until it returns 0.
+ */
+ timeout_begin(TIMEOUT);
+ do {
+ ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
+ break;
+ }
perror("ioctl");
exit(EXIT_FAILURE);
}
- } else if (ret == 0 && sock_bytes_unsent != 0) {
- fprintf(stderr,
- "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n",
- sock_bytes_unsent);
- exit(EXIT_FAILURE);
- }
-
+ timeout_check("SIOCOUTQ");
+ } while (sock_bytes_unsent != 0);
+ timeout_end();
close(fd);
}