aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h5
-rw-r--r--tools/arch/arm64/include/uapi/asm/unistd.h24
-rw-r--r--tools/arch/x86/include/asm/amd/ibs.h (renamed from tools/arch/x86/include/asm/amd-ibs.h)2
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h48
-rw-r--r--tools/arch/x86/include/asm/inat.h6
-rw-r--r--tools/arch/x86/include/asm/msr-index.h31
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h4
-rw-r--r--tools/arch/x86/include/uapi/asm/svm.h2
-rw-r--r--tools/arch/x86/kcpuid/cpuid.csv791
-rw-r--r--tools/arch/x86/kcpuid/kcpuid.c375
-rw-r--r--tools/arch/x86/lib/insn.c7
-rw-r--r--tools/arch/x86/lib/memset_64.S3
-rw-r--r--tools/arch/x86/lib/x86-opcode-map.txt60
-rw-r--r--tools/arch/x86/tools/gen-insn-attr-x86.awk7
-rw-r--r--tools/counter/.gitignore1
-rw-r--r--tools/counter/counter_watch_events.c5
-rw-r--r--tools/hv/hv_kvp_daemon.c108
-rw-r--r--tools/iio/iio_event_monitor.c4
-rw-r--r--tools/include/asm/timex.h13
-rw-r--r--tools/include/linux/bitmap.h21
-rw-r--r--tools/include/linux/cfi_types.h45
-rw-r--r--tools/include/linux/container_of.h18
-rw-r--r--tools/include/linux/kernel.h14
-rw-r--r--tools/include/linux/math64.h5
-rw-r--r--tools/include/linux/moduleparam.h7
-rw-r--r--tools/include/linux/prandom.h51
-rw-r--r--tools/include/linux/refcount.h5
-rw-r--r--tools/include/linux/slab.h1
-rw-r--r--tools/include/linux/types.h2
-rw-r--r--tools/include/uapi/asm-generic/mman-common.h1
-rw-r--r--tools/include/uapi/asm-generic/unistd.h4
-rw-r--r--tools/include/uapi/linux/bpf.h3
-rw-r--r--tools/include/uapi/linux/in.h2
-rw-r--r--tools/include/uapi/linux/kvm.h9
-rw-r--r--tools/include/uapi/linux/perf_event.h2
-rw-r--r--tools/include/uapi/linux/stat.h99
-rw-r--r--tools/include/uapi/linux/types.h3
-rw-r--r--tools/lib/bitmap.c20
-rw-r--r--tools/lib/perf/Makefile14
-rw-r--r--tools/lib/slab.c16
-rw-r--r--tools/net/ynl/lib/ynl.c2
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py96
-rw-r--r--tools/objtool/Documentation/objtool.txt10
-rw-r--r--tools/objtool/arch/loongarch/decode.c14
-rw-r--r--tools/objtool/arch/loongarch/orc.c8
-rw-r--r--tools/objtool/arch/x86/decode.c39
-rw-r--r--tools/objtool/arch/x86/orc.c6
-rw-r--r--tools/objtool/arch/x86/special.c38
-rw-r--r--tools/objtool/builtin-check.c132
-rw-r--r--tools/objtool/check.c706
-rw-r--r--tools/objtool/elf.c156
-rw-r--r--tools/objtool/include/objtool/arch.h3
-rw-r--r--tools/objtool/include/objtool/builtin.h6
-rw-r--r--tools/objtool/include/objtool/check.h3
-rw-r--r--tools/objtool/include/objtool/elf.h30
-rw-r--r--tools/objtool/include/objtool/objtool.h2
-rw-r--r--tools/objtool/include/objtool/special.h4
-rw-r--r--tools/objtool/include/objtool/warn.h62
-rw-r--r--tools/objtool/objtool.c15
-rw-r--r--tools/objtool/orc_dump.c30
-rw-r--r--tools/objtool/special.c25
-rw-r--r--tools/perf/Makefile.config1
-rw-r--r--tools/perf/arch/arm/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl1
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/sh/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/sparc/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_32.tbl3
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--tools/perf/arch/xtensa/entry/syscalls/syscall.tbl1
-rwxr-xr-xtools/perf/check-headers.sh3
-rwxr-xr-xtools/perf/tests/shell/trace_btf_enum.sh2
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h2
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fcntl.h4
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fs.h21
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/mount.h10
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/prctl.h11
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/stat.h99
-rw-r--r--tools/perf/trace/beauty/include/uapi/sound/asound.h8
-rw-r--r--tools/perf/util/amd-sample-raw.c2
-rw-r--r--tools/perf/util/evsel.c22
-rw-r--r--tools/perf/util/unwind-libunwind-local.c2
-rw-r--r--tools/power/x86/turbostat/turbostat.816
-rw-r--r--tools/power/x86/turbostat/turbostat.c162
-rw-r--r--tools/sched_ext/include/scx/common.bpf.h85
-rw-r--r--tools/sched_ext/include/scx/enum_defs.autogen.h3
-rw-r--r--tools/sched_ext/include/scx/enums.autogen.bpf.h24
-rw-r--r--tools/sched_ext/include/scx/enums.autogen.h8
-rw-r--r--tools/sched_ext/include/scx/enums.h3
-rw-r--r--tools/sched_ext/scx_flatcg.bpf.c2
-rw-r--r--tools/scripts/syscall.tbl1
-rw-r--r--tools/testing/cxl/Kbuild3
-rw-r--r--tools/testing/cxl/test/cxl.c32
-rw-r--r--tools/testing/cxl/test/mem.c34
-rw-r--r--tools/testing/kunit/configs/all_tests.config4
-rw-r--r--tools/testing/kunit/kunit_parser.py4
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py4
-rw-r--r--tools/testing/kunit/qemu_configs/sh.py4
-rw-r--r--tools/testing/memblock/internal.h6
-rw-r--r--tools/testing/memblock/linux/mutex.h14
-rw-r--r--tools/testing/memblock/tests/basic_api.c102
-rw-r--r--tools/testing/radix-tree/Makefile1
-rw-r--r--tools/testing/rbtree/Makefile33
-rw-r--r--tools/testing/rbtree/interval_tree_test.c58
-rw-r--r--tools/testing/rbtree/rbtree_test.c48
-rw-r--r--tools/testing/rbtree/test.h4
-rw-r--r--tools/testing/selftests/.gitignore1
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/bpf/config.x86_641
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/res_spin_lock.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h2
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_hash_modify.c30
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h2
-rw-r--r--tools/testing/selftests/bpf/progs/res_spin_lock.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_module_attach.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs_extable.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_private_stack.c6
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh617
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h2
l---------tools/testing/selftests/drivers/net/dsa/tc_taprio.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/hds.py35
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/csum.py2
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py12
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/irq.py2
-rw-r--r--tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c13
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py21
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/psfp.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py60
-rwxr-xr-xtools/testing/selftests/drivers/net/queues.py4
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c57
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc23
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc20
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc177
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c2
-rw-r--r--tools/testing/selftests/hid/config.common1
-rw-r--r--tools/testing/selftests/iommu/iommufd.c365
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c59
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h229
-rw-r--r--tools/testing/selftests/kexec/Makefile7
-rw-r--r--tools/testing/selftests/kexec/test_kexec_jump.c72
-rwxr-xr-xtools/testing/selftests/kexec/test_kexec_jump.sh42
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm45
-rw-r--r--tools/testing/selftests/kvm/arm64/page_fault_test.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c8
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h67
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c60
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c5
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c8
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c31
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c108
-rw-r--r--tools/testing/selftests/landlock/audit.h21
-rw-r--r--tools/testing/selftests/landlock/audit_test.c154
-rw-r--r--tools/testing/selftests/landlock/fs_test.c3
-rw-r--r--tools/testing/selftests/lib/config1
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c19
-rw-r--r--tools/testing/selftests/mm/.gitignore2
-rw-r--r--tools/testing/selftests/mm/Makefile2
-rwxr-xr-xtools/testing/selftests/mm/charge_reserved_hugetlb.sh4
-rw-r--r--tools/testing/selftests/mm/compaction_test.c19
-rw-r--r--tools/testing/selftests/mm/cow.c4
-rw-r--r--tools/testing/selftests/mm/guard-regions.c (renamed from tools/testing/selftests/mm/guard-pages.c)972
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c41
-rwxr-xr-xtools/testing/selftests/mm/hugetlb_reparenting_test.sh2
-rw-r--r--tools/testing/selftests/mm/map_populate.c5
-rw-r--r--tools/testing/selftests/mm/mlock-random-test.c4
-rw-r--r--tools/testing/selftests/mm/mlock2.h8
-rw-r--r--tools/testing/selftests/mm/pkey-powerpc.h14
-rw-r--r--tools/testing/selftests/mm/pkey_util.c1
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh95
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c106
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c4
-rw-r--r--tools/testing/selftests/mm/uffd-common.c12
-rw-r--r--tools/testing/selftests/mm/uffd-common.h2
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c42
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c7
-rw-r--r--tools/testing/selftests/mm/uffd-wp-mremap.c5
-rwxr-xr-xtools/testing/selftests/mm/va_high_addr_switch.sh28
-rw-r--r--tools/testing/selftests/mm/vm_util.h19
-rw-r--r--tools/testing/selftests/mseal_system_mappings/.gitignore2
-rw-r--r--tools/testing/selftests/mseal_system_mappings/Makefile6
-rw-r--r--tools/testing/selftests/mseal_system_mappings/config1
-rw-r--r--tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c119
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile3
-rwxr-xr-xtools/testing/selftests/net/amt.sh20
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh34
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh96
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_taprio.sh421
-rw-r--r--tools/testing/selftests/net/forwarding/tsn_lib.sh26
-rwxr-xr-xtools/testing/selftests/net/gre_ipv6_lladdr.sh177
-rw-r--r--tools/testing/selftests/net/lib.sh25
-rw-r--r--tools/testing/selftests/net/mptcp/.gitignore1
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh5
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c11
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh18
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh39
-rwxr-xr-xtools/testing/selftests/net/netns-name.sh13
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.py4
-rw-r--r--tools/testing/selftests/net/skf_net_off.c244
-rwxr-xr-xtools/testing/selftests/net/skf_net_off.sh30
-rw-r--r--tools/testing/selftests/net/tcp_ao/self-connect.c3
-rw-r--r--tools/testing/selftests/net/tls.c36
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh2
-rwxr-xr-xtools/testing/selftests/net/veth.sh2
-rw-r--r--tools/testing/selftests/net/xdp_dummy.bpf.c13
-rw-r--r--tools/testing/selftests/pcie_bwctrl/Makefile3
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h8
-rw-r--r--tools/testing/selftests/riscv/hwprobe/cbo.c66
-rw-r--r--tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c10
-rw-r--r--tools/testing/selftests/rtc/.gitignore1
-rw-r--r--tools/testing/selftests/rtc/Makefile2
-rw-r--r--tools/testing/selftests/rtc/rtctest.c19
-rw-r--r--tools/testing/selftests/rtc/setdate.c77
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/nat.json14
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/actions.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json449
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json36
-rw-r--r--tools/testing/selftests/tpm2/.gitignore3
-rwxr-xr-xtools/testing/selftests/tpm2/test_smoke.sh2
-rw-r--r--tools/testing/selftests/ublk/Makefile15
-rw-r--r--tools/testing/selftests/ublk/fault_inject.c98
-rw-r--r--tools/testing/selftests/ublk/kublk.c364
-rw-r--r--tools/testing/selftests/ublk/kublk.h51
-rw-r--r--tools/testing/selftests/ublk/null.c11
-rw-r--r--tools/testing/selftests/ublk/stripe.c97
-rwxr-xr-xtools/testing/selftests/ublk/test_common.sh152
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_02.sh44
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_03.sh28
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_04.sh40
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_05.sh44
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_06.sh41
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_07.sh28
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_01.sh22
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_02.sh8
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_03.sh22
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_04.sh9
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_05.sh26
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_01.sh45
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_02.sh45
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_03.sh38
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_04.sh37
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_05.sh64
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_01.sh26
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_02.sh13
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_03.sh26
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_04.sh21
-rw-r--r--tools/testing/selftests/x86/Makefile3
-rw-r--r--tools/testing/selftests/x86/apx.c10
-rw-r--r--tools/testing/selftests/x86/lam.c9
-rw-r--r--tools/testing/selftests/x86/test_mremap_vdso.c43
-rw-r--r--tools/testing/selftests/x86/xstate.c3
-rw-r--r--tools/testing/selftests/x86/xstate.h2
-rw-r--r--tools/testing/shared/interval_tree-shim.c5
-rw-r--r--tools/testing/shared/linux.c4
-rw-r--r--tools/testing/shared/linux/cleanup.h2
-rw-r--r--tools/testing/shared/linux/interval_tree.h7
-rw-r--r--tools/testing/shared/linux/interval_tree_generic.h2
-rw-r--r--tools/testing/shared/linux/rbtree.h8
-rw-r--r--tools/testing/shared/linux/rbtree_augmented.h7
-rw-r--r--tools/testing/shared/linux/rbtree_types.h8
-rw-r--r--tools/testing/shared/rbtree-shim.c6
-rw-r--r--tools/testing/vma/linux/atomic.h5
-rw-r--r--tools/testing/vma/vma.c105
-rw-r--r--tools/testing/vma/vma_internal.h131
-rw-r--r--tools/virtio/linux/compiler.h25
-rw-r--r--tools/virtio/linux/dma-mapping.h13
-rw-r--r--tools/virtio/linux/module.h7
273 files changed, 9149 insertions, 2690 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 6d44f8c8a18f..af9d9acaf997 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -43,9 +43,6 @@
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_DIRTY_LOG_PAGE_OFFSET 64
-#define KVM_REG_SIZE(id) \
- (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-
struct kvm_regs {
struct user_pt_regs regs; /* sp = sp_el0 */
@@ -108,6 +105,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */
#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */
#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */
+#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */
struct kvm_vcpu_init {
__u32 target;
@@ -418,6 +416,7 @@ enum {
#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
+#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
(0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h
index 9306726337fe..df36f23876e8 100644
--- a/tools/arch/arm64/include/uapi/asm/unistd.h
+++ b/tools/arch/arm64/include/uapi/asm/unistd.h
@@ -1,24 +1,2 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#define __ARCH_WANT_RENAMEAT
-#define __ARCH_WANT_NEW_STAT
-#define __ARCH_WANT_SET_GET_RLIMIT
-#define __ARCH_WANT_TIME32_SYSCALLS
-#define __ARCH_WANT_MEMFD_SECRET
-
-#include <asm-generic/unistd.h>
+#include <asm/unistd_64.h>
diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd/ibs.h
index cb1740bc3da2..300b6e0765b2 100644
--- a/tools/arch/x86/include/asm/amd-ibs.h
+++ b/tools/arch/x86/include/asm/amd/ibs.h
@@ -4,7 +4,7 @@
* 55898 Rev 0.35 - Feb 5, 2021
*/
-#include "msr-index.h"
+#include "../msr-index.h"
/* IBS_OP_DATA2 DataSrc */
#define IBS_DATA_SRC_LOC_CACHE 2
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 9e3fa7942e7d..bc81b9d1aeca 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -75,8 +75,8 @@
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */
#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */
#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */
-#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */
-#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */
+/* Free ( 3*32+ 6) */
+/* Free ( 3*32+ 7) */
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */
#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */
#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */
@@ -329,6 +329,7 @@
#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
+#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */
#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */
#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
@@ -377,6 +378,7 @@
#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */
#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */
+#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/
@@ -434,15 +436,18 @@
#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */
/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
-#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */
-#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */
+#define X86_FEATURE_SME (19*32+ 0) /* "sme" Secure Memory Encryption */
+#define X86_FEATURE_SEV (19*32+ 1) /* "sev" Secure Encrypted Virtualization */
#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */
-#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */
-#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
-#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
-#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
+#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
+#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
+#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
+#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
@@ -455,6 +460,11 @@
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
+#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */
+#define X86_FEATURE_SRSO_BP_SPEC_REDUCE (20*32+31) /*
+ * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs.
+ * (SRSO_MSR_FIX in the official doc).
+ */
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
@@ -466,10 +476,11 @@
#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */
#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */
#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
-#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
-#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */
-#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
-#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */
+#define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */
+#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */
+#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */
+#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
/*
* BUG word(s)
@@ -508,7 +519,7 @@
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */
#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
+/* unused, was #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */
#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */
@@ -516,9 +527,10 @@
#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */
/* BUG word 2 */
-#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */
-#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */
-#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
-#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */
-#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
+#define X86_BUG_SRSO X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */
+#define X86_BUG_DIV0 X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */
+#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */
+#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/inat.h b/tools/arch/x86/include/asm/inat.h
index 253690eb3c26..183aa662b165 100644
--- a/tools/arch/x86/include/asm/inat.h
+++ b/tools/arch/x86/include/asm/inat.h
@@ -82,6 +82,7 @@
#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8))
#define INAT_REX2_VARIANT (1 << (INAT_FLAG_OFFS + 9))
#define INAT_EVEX_SCALABLE (1 << (INAT_FLAG_OFFS + 10))
+#define INAT_INV64 (1 << (INAT_FLAG_OFFS + 11))
/* Attribute making macros for attribute tables */
#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
@@ -242,4 +243,9 @@ static inline int inat_evex_scalable(insn_attr_t attr)
{
return attr & INAT_EVEX_SCALABLE;
}
+
+static inline int inat_is_invalid64(insn_attr_t attr)
+{
+ return attr & INAT_INV64;
+}
#endif
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index dc1c1057f26e..e6134ef2263d 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -397,7 +397,8 @@
#define MSR_IA32_PASID_VALID BIT_ULL(31)
/* DEBUGCTLMSR bits (others vary by model): */
-#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */
+#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT)
#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2)
@@ -610,6 +611,7 @@
#define MSR_AMD_PERF_CTL 0xc0010062
#define MSR_AMD_PERF_STATUS 0xc0010063
#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
+#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
@@ -646,6 +648,7 @@
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
+#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
@@ -684,11 +687,12 @@
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
#define MSR_AMD64_SNP_RESV_BIT 18
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
-
-#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
-
#define MSR_AMD64_RMP_BASE 0xc0010132
#define MSR_AMD64_RMP_END 0xc0010133
+#define MSR_AMD64_RMP_CFG 0xc0010136
+#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0
+#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT)
+#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8)
#define MSR_SVSM_CAA 0xc001f000
@@ -699,15 +703,17 @@
#define MSR_AMD_CPPC_REQ 0xc00102b3
#define MSR_AMD_CPPC_STATUS 0xc00102b4
-#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
-#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
-#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff)
-#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff)
+/* Masks for use with MSR_AMD_CPPC_CAP1 */
+#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24)
-#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0)
-#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
-#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
-#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
+/* Masks for use with MSR_AMD_CPPC_REQ */
+#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
/* AMD Performance Counter Global Status and Control MSRs */
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
@@ -719,6 +725,7 @@
/* Zen4 */
#define MSR_ZEN4_BP_CFG 0xc001102e
+#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4
#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
/* Fam 19h MSRs */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 88585c1de416..460306b35a4b 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -559,6 +559,9 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8)
+#define KVM_XEN_MSR_MIN_INDEX 0x40000000u
+#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu
+
struct kvm_xen_hvm_config {
__u32 flags;
__u32 msr;
@@ -925,5 +928,6 @@ struct kvm_hyperv_eventfd {
#define KVM_X86_SEV_VM 2
#define KVM_X86_SEV_ES_VM 3
#define KVM_X86_SNP_VM 4
+#define KVM_X86_TDX_VM 5
#endif /* _ASM_X86_KVM_H */
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
index 1814b413fd57..ec1321248dac 100644
--- a/tools/arch/x86/include/uapi/asm/svm.h
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -95,6 +95,7 @@
#define SVM_EXIT_CR14_WRITE_TRAP 0x09e
#define SVM_EXIT_CR15_WRITE_TRAP 0x09f
#define SVM_EXIT_INVPCID 0x0a2
+#define SVM_EXIT_IDLE_HLT 0x0a6
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
@@ -224,6 +225,7 @@
{ SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \
{ SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
+ { SVM_EXIT_IDLE_HLT, "idle-halt" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \
diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv
index d751eb8585d0..8d925ce9750f 100644
--- a/tools/arch/x86/kcpuid/cpuid.csv
+++ b/tools/arch/x86/kcpuid/cpuid.csv
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: CC0-1.0
-# Generator: x86-cpuid-db v1.0
+# Generator: x86-cpuid-db v2.4
#
# Auto-generated file.
@@ -12,297 +12,298 @@
# Leaf 0H
# Maximum standard leaf number + CPU vendor string
- 0, 0, eax, 31:0, max_std_leaf , Highest cpuid standard leaf supported
- 0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3
- 0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11
- 0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7
+ 0x0, 0, eax, 31:0, max_std_leaf , Highest standard CPUID leaf supported
+ 0x0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3
+ 0x0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11
+ 0x0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7
# Leaf 1H
# CPU FMS (Family/Model/Stepping) + standard feature flags
- 1, 0, eax, 3:0, stepping , Stepping ID
- 1, 0, eax, 7:4, base_model , Base CPU model ID
- 1, 0, eax, 11:8, base_family_id , Base CPU family ID
- 1, 0, eax, 13:12, cpu_type , CPU type
- 1, 0, eax, 19:16, ext_model , Extended CPU model ID
- 1, 0, eax, 27:20, ext_family , Extended CPU family ID
- 1, 0, ebx, 7:0, brand_id , Brand index
- 1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size
- 1, 0, ebx, 23:16, n_logical_cpu , Logical CPU (HW threads) count
- 1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID
- 1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3)
- 1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support
- 1, 0, ecx, 2, dtes64 , 64-bit DS save area
- 1, 0, ecx, 3, monitor , MONITOR/MWAIT support
- 1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store
- 1, 0, ecx, 5, vmx , Virtual Machine Extensions
- 1, 0, ecx, 6, smx , Safer Mode Extensions
- 1, 0, ecx, 7, est , Enhanced Intel SpeedStep
- 1, 0, ecx, 8, tm2 , Thermal Monitor 2
- 1, 0, ecx, 9, ssse3 , Supplemental SSE3
- 1, 0, ecx, 10, cid , L1 Context ID
- 1, 0, ecx, 11, sdbg , Sillicon Debug
- 1, 0, ecx, 12, fma , FMA extensions using YMM state
- 1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support
- 1, 0, ecx, 14, xtpr , xTPR Update Control
- 1, 0, ecx, 15, pdcm , Perfmon and Debug Capability
- 1, 0, ecx, 17, pcid , Process-context identifiers
- 1, 0, ecx, 18, dca , Direct Cache Access
- 1, 0, ecx, 19, sse4_1 , SSE4.1
- 1, 0, ecx, 20, sse4_2 , SSE4.2
- 1, 0, ecx, 21, x2apic , X2APIC support
- 1, 0, ecx, 22, movbe , MOVBE instruction support
- 1, 0, ecx, 23, popcnt , POPCNT instruction support
- 1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation
- 1, 0, ecx, 25, aes , AES instructions
- 1, 0, ecx, 26, xsave , XSAVE (and related instructions) support
- 1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS
- 1, 0, ecx, 28, avx , AVX instructions support
- 1, 0, ecx, 29, f16c , Half-precision floating-point conversion support
- 1, 0, ecx, 30, rdrand , RDRAND instruction support
- 1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system
- 1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87)
- 1, 0, edx, 1, vme , Virtual-8086 Mode Extensions
- 1, 0, edx, 2, de , Debugging Extensions
- 1, 0, edx, 3, pse , Page Size Extension
- 1, 0, edx, 4, tsc , Time Stamp Counter
- 1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support)
- 1, 0, edx, 6, pae , Physical Address Extensions
- 1, 0, edx, 7, mce , Machine Check Exception
- 1, 0, edx, 8, cx8 , CMPXCHG8B instruction
- 1, 0, edx, 9, apic , APIC on-chip
- 1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs
- 1, 0, edx, 12, mtrr , Memory Type Range Registers
- 1, 0, edx, 13, pge , Page Global Extensions
- 1, 0, edx, 14, mca , Machine Check Architecture
- 1, 0, edx, 15, cmov , Conditional Move Instruction
- 1, 0, edx, 16, pat , Page Attribute Table
- 1, 0, edx, 17, pse36 , Page Size Extension (36-bit)
- 1, 0, edx, 18, pn , Processor Serial Number
- 1, 0, edx, 19, clflush , CLFLUSH instruction
- 1, 0, edx, 21, dts , Debug Store
- 1, 0, edx, 22, acpi , Thermal monitor and clock control
- 1, 0, edx, 23, mmx , MMX instructions
- 1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions
- 1, 0, edx, 25, sse , SSE instructions
- 1, 0, edx, 26, sse2 , SSE2 instructions
- 1, 0, edx, 27, ss , Self Snoop
- 1, 0, edx, 28, ht , Hyper-threading
- 1, 0, edx, 29, tm , Thermal Monitor
- 1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now resreved
- 1, 0, edx, 31, pbe , Pending Break Enable
+ 0x1, 0, eax, 3:0, stepping , Stepping ID
+ 0x1, 0, eax, 7:4, base_model , Base CPU model ID
+ 0x1, 0, eax, 11:8, base_family_id , Base CPU family ID
+ 0x1, 0, eax, 13:12, cpu_type , CPU type
+ 0x1, 0, eax, 19:16, ext_model , Extended CPU model ID
+ 0x1, 0, eax, 27:20, ext_family , Extended CPU family ID
+ 0x1, 0, ebx, 7:0, brand_id , Brand index
+ 0x1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size
+ 0x1, 0, ebx, 23:16, n_logical_cpu , Logical CPU count
+ 0x1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID
+ 0x1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3)
+ 0x1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support
+ 0x1, 0, ecx, 2, dtes64 , 64-bit DS save area
+ 0x1, 0, ecx, 3, monitor , MONITOR/MWAIT support
+ 0x1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store
+ 0x1, 0, ecx, 5, vmx , Virtual Machine Extensions
+ 0x1, 0, ecx, 6, smx , Safer Mode Extensions
+ 0x1, 0, ecx, 7, est , Enhanced Intel SpeedStep
+ 0x1, 0, ecx, 8, tm2 , Thermal Monitor 2
+ 0x1, 0, ecx, 9, ssse3 , Supplemental SSE3
+ 0x1, 0, ecx, 10, cid , L1 Context ID
+ 0x1, 0, ecx, 11, sdbg , Silicon Debug
+ 0x1, 0, ecx, 12, fma , FMA extensions using YMM state
+ 0x1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support
+ 0x1, 0, ecx, 14, xtpr , xTPR Update Control
+ 0x1, 0, ecx, 15, pdcm , Perfmon and Debug Capability
+ 0x1, 0, ecx, 17, pcid , Process-context identifiers
+ 0x1, 0, ecx, 18, dca , Direct Cache Access
+ 0x1, 0, ecx, 19, sse4_1 , SSE4.1
+ 0x1, 0, ecx, 20, sse4_2 , SSE4.2
+ 0x1, 0, ecx, 21, x2apic , X2APIC support
+ 0x1, 0, ecx, 22, movbe , MOVBE instruction support
+ 0x1, 0, ecx, 23, popcnt , POPCNT instruction support
+ 0x1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation
+ 0x1, 0, ecx, 25, aes , AES instructions
+ 0x1, 0, ecx, 26, xsave , XSAVE (and related instructions) support
+ 0x1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS
+ 0x1, 0, ecx, 28, avx , AVX instructions support
+ 0x1, 0, ecx, 29, f16c , Half-precision floating-point conversion support
+ 0x1, 0, ecx, 30, rdrand , RDRAND instruction support
+ 0x1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system
+ 0x1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87)
+ 0x1, 0, edx, 1, vme , Virtual-8086 Mode Extensions
+ 0x1, 0, edx, 2, de , Debugging Extensions
+ 0x1, 0, edx, 3, pse , Page Size Extension
+ 0x1, 0, edx, 4, tsc , Time Stamp Counter
+ 0x1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support)
+ 0x1, 0, edx, 6, pae , Physical Address Extensions
+ 0x1, 0, edx, 7, mce , Machine Check Exception
+ 0x1, 0, edx, 8, cx8 , CMPXCHG8B instruction
+ 0x1, 0, edx, 9, apic , APIC on-chip
+ 0x1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs
+ 0x1, 0, edx, 12, mtrr , Memory Type Range Registers
+ 0x1, 0, edx, 13, pge , Page Global Extensions
+ 0x1, 0, edx, 14, mca , Machine Check Architecture
+ 0x1, 0, edx, 15, cmov , Conditional Move Instruction
+ 0x1, 0, edx, 16, pat , Page Attribute Table
+ 0x1, 0, edx, 17, pse36 , Page Size Extension (36-bit)
+ 0x1, 0, edx, 18, pn , Processor Serial Number
+ 0x1, 0, edx, 19, clflush , CLFLUSH instruction
+ 0x1, 0, edx, 21, dts , Debug Store
+ 0x1, 0, edx, 22, acpi , Thermal monitor and clock control
+ 0x1, 0, edx, 23, mmx , MMX instructions
+ 0x1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions
+ 0x1, 0, edx, 25, sse , SSE instructions
+ 0x1, 0, edx, 26, sse2 , SSE2 instructions
+ 0x1, 0, edx, 27, ss , Self Snoop
+ 0x1, 0, edx, 28, ht , Hyper-threading
+ 0x1, 0, edx, 29, tm , Thermal Monitor
+ 0x1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now reserved
+ 0x1, 0, edx, 31, pbe , Pending Break Enable
# Leaf 2H
# Intel cache and TLB information one-byte descriptors
- 2, 0, eax, 7:0, iteration_count , Number of times this CPUD leaf must be queried
- 2, 0, eax, 15:8, desc1 , Descriptor #1
- 2, 0, eax, 23:16, desc2 , Descriptor #2
- 2, 0, eax, 30:24, desc3 , Descriptor #3
- 2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set
- 2, 0, ebx, 7:0, desc4 , Descriptor #4
- 2, 0, ebx, 15:8, desc5 , Descriptor #5
- 2, 0, ebx, 23:16, desc6 , Descriptor #6
- 2, 0, ebx, 30:24, desc7 , Descriptor #7
- 2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set
- 2, 0, ecx, 7:0, desc8 , Descriptor #8
- 2, 0, ecx, 15:8, desc9 , Descriptor #9
- 2, 0, ecx, 23:16, desc10 , Descriptor #10
- 2, 0, ecx, 30:24, desc11 , Descriptor #11
- 2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set
- 2, 0, edx, 7:0, desc12 , Descriptor #12
- 2, 0, edx, 15:8, desc13 , Descriptor #13
- 2, 0, edx, 23:16, desc14 , Descriptor #14
- 2, 0, edx, 30:24, desc15 , Descriptor #15
- 2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set
+ 0x2, 0, eax, 7:0, iteration_count , Number of times this leaf must be queried
+ 0x2, 0, eax, 15:8, desc1 , Descriptor #1
+ 0x2, 0, eax, 23:16, desc2 , Descriptor #2
+ 0x2, 0, eax, 30:24, desc3 , Descriptor #3
+ 0x2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set
+ 0x2, 0, ebx, 7:0, desc4 , Descriptor #4
+ 0x2, 0, ebx, 15:8, desc5 , Descriptor #5
+ 0x2, 0, ebx, 23:16, desc6 , Descriptor #6
+ 0x2, 0, ebx, 30:24, desc7 , Descriptor #7
+ 0x2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set
+ 0x2, 0, ecx, 7:0, desc8 , Descriptor #8
+ 0x2, 0, ecx, 15:8, desc9 , Descriptor #9
+ 0x2, 0, ecx, 23:16, desc10 , Descriptor #10
+ 0x2, 0, ecx, 30:24, desc11 , Descriptor #11
+ 0x2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set
+ 0x2, 0, edx, 7:0, desc12 , Descriptor #12
+ 0x2, 0, edx, 15:8, desc13 , Descriptor #13
+ 0x2, 0, edx, 23:16, desc14 , Descriptor #14
+ 0x2, 0, edx, 30:24, desc15 , Descriptor #15
+ 0x2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set
# Leaf 4H
# Intel deterministic cache parameters
- 4, 31:0, eax, 4:0, cache_type , Cache type field
- 4, 31:0, eax, 7:5, cache_level , Cache level (1-based)
- 4, 31:0, eax, 8, cache_self_init , Self-initialializing cache level
- 4, 31:0, eax, 9, fully_associative , Fully-associative cache
- 4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache
- 4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package
- 4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based)
- 4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based)
- 4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based)
- 4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based)
- 4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches
- 4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches
- 4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function)
+ 0x4, 31:0, eax, 4:0, cache_type , Cache type field
+ 0x4, 31:0, eax, 7:5, cache_level , Cache level (1-based)
+ 0x4, 31:0, eax, 8, cache_self_init , Self-initializing cache level
+ 0x4, 31:0, eax, 9, fully_associative , Fully-associative cache
+ 0x4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache
+ 0x4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package
+ 0x4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based)
+ 0x4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based)
+ 0x4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based)
+ 0x4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based)
+ 0x4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches
+ 0x4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches
+ 0x4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function)
# Leaf 5H
# MONITOR/MWAIT instructions enumeration
- 5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes
- 5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes
- 5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported
- 5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported
- 5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT
- 5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT
- 5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT
- 5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT
- 5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT
- 5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT
- 5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT
- 5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT
+ 0x5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes
+ 0x5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes
+ 0x5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported
+ 0x5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported
+ 0x5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT
+ 0x5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT
+ 0x5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT
+ 0x5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT
+ 0x5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT
+ 0x5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT
+ 0x5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT
+ 0x5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT
# Leaf 6H
# Thermal and Power Management enumeration
- 6, 0, eax, 0, dtherm , Digital temprature sensor
- 6, 0, eax, 1, turbo_boost , Intel Turbo Boost
- 6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state)
- 6, 0, eax, 4, pln , Power Limit Notification (PLN) event
- 6, 0, eax, 5, ecmd , Clock modulation duty cycle extension
- 6, 0, eax, 6, pts , Package thermal management
- 6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported
- 6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR)
- 6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported
- 6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference
- 6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request
- 6, 0, eax, 13, hdc_base_regs , HDC base registers are supported
- 6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0
- 6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change
- 6, 0, eax, 16, hwp_peci_override , HWP PECI override
- 6, 0, eax, 17, hwp_flexible , Flexible HWP
- 6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode
- 6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported
- 6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported
- 6, 0, eax, 23, thread_director , Intel thread director support
- 6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported
- 6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds
- 6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface)
- 6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support
- 6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director
- 6, 0, edx, 0, perfcap_reporting , Performance capability reporting
- 6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting
- 6, 0, edx, 11:8, feedback_sz , HW feedback interface struct size, in 4K pages
- 6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU index @ HW feedback struct, 0-based
+ 0x6, 0, eax, 0, dtherm , Digital temperature sensor
+ 0x6, 0, eax, 1, turbo_boost , Intel Turbo Boost
+ 0x6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state)
+ 0x6, 0, eax, 4, pln , Power Limit Notification (PLN) event
+ 0x6, 0, eax, 5, ecmd , Clock modulation duty cycle extension
+ 0x6, 0, eax, 6, pts , Package thermal management
+ 0x6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported
+ 0x6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR)
+ 0x6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported
+ 0x6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference
+ 0x6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request
+ 0x6, 0, eax, 13, hdc_base_regs , HDC base registers are supported
+ 0x6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0
+ 0x6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change
+ 0x6, 0, eax, 16, hwp_peci_override , HWP PECI override
+ 0x6, 0, eax, 17, hwp_flexible , Flexible HWP
+ 0x6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode
+ 0x6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported
+ 0x6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported
+ 0x6, 0, eax, 23, thread_director , Intel thread director support
+ 0x6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported
+ 0x6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds
+ 0x6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface)
+ 0x6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support
+ 0x6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director
+ 0x6, 0, edx, 0, perfcap_reporting , Performance capability reporting
+ 0x6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting
+ 0x6, 0, edx, 11:8, feedback_sz , Feedback interface structure size, in 4K pages
+ 0x6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU hardware feedback interface index
# Leaf 7H
# Extended CPU features enumeration
- 7, 0, eax, 31:0, leaf7_n_subleaves , Number of cpuid 0x7 subleaves
- 7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support
- 7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported
- 7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions)
- 7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1
- 7, 0, ebx, 4, hle , Hardware Lock Elision
- 7, 0, ebx, 5, avx2 , AVX2 instruction set
- 7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions
- 7, 0, ebx, 7, smep , Supervisor Mode Execution Protection
- 7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2
- 7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB
- 7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID)
- 7, 0, ebx, 11, rtm , Intel restricted transactional memory
- 7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring
- 7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero)
- 7, 0, ebx, 14, mpx , Intel memory protection extensions
- 7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcemeent
- 7, 0, ebx, 16, avx512f , AVX-512 foundation instructions
- 7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions
- 7, 0, ebx, 18, rdseed , RDSEED instruction
- 7, 0, ebx, 19, adx , ADCX/ADOX instructions
- 7, 0, ebx, 20, smap , Supervisor mode access prevention
- 7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add
- 7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction
- 7, 0, ebx, 24, clwb , CLWB instruction
- 7, 0, ebx, 25, intel_pt , Intel processor trace
- 7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions
- 7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instrs
- 7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instrs
- 7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions
- 7, 0, ebx, 30, avx512bw , AVX-512 BW (byte/word granular) instructions
- 7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions
- 7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only)
- 7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instrs
- 7, 0, ecx, 2, umip , User mode instruction protection
- 7, 0, ecx, 3, pku , Protection keys for user-space
- 7, 0, ecx, 4, ospke , OS protection keys enable
- 7, 0, ecx, 5, waitpkg , WAITPKG instructions
- 7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instrs group 2
- 7, 0, ecx, 7, cet_ss , CET shadow stack features
- 7, 0, ecx, 8, gfni , Galois field new instructions
- 7, 0, ecx, 9, vaes , Vector AES instrs
- 7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support
- 7, 0, ecx, 11, avx512_vnni , Vector neural network instructions
- 7, 0, ecx, 12, avx512_bitalg , AVX-512 bit count/shiffle
- 7, 0, ecx, 13, tme , Intel total memory encryption
- 7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DW/QW
- 7, 0, ecx, 16, la57 , 57-bit linear addreses (five-level paging)
- 7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode
- 7, 0, ecx, 22, rdpid , RDPID instruction
- 7, 0, ecx, 23, key_locker , Intel key locker support
- 7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection
- 7, 0, ecx, 25, cldemote , CLDEMOTE instruction
- 7, 0, ecx, 27, movdiri , MOVDIRI instruction
- 7, 0, ecx, 28, movdir64b , MOVDIR64B instruction
- 7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S})
- 7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration
- 7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages
- 7, 0, edx, 1, sgx_keys , Intel SGX attestation services
- 7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions
- 7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision
- 7, 0, edx, 4, fsrm , Fast short REP MOV
- 7, 0, edx, 5, uintr , CPU supports user interrupts
- 7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions
- 7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available
- 7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support
- 7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts
- 7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported
- 7, 0, edx, 14, serialize , SERIALIZE instruction
- 7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part'
- 7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking
- 7, 0, edx, 18, pconfig , PCONFIG instruction
- 7, 0, edx, 19, arch_lbr , Intel architectural LBRs
- 7, 0, edx, 20, ibt , CET indirect branch tracking
- 7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support
- 7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions
- 7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support
- 7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support
- 7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions)
- 7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors
- 7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR
- 7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR
- 7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR
- 7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable
- 7, 1, eax, 4, avx_vnni , AVX-VNNI instructions
- 7, 1, eax, 5, avx512_bf16 , AVX-512 bFloat16 instructions
- 7, 1, eax, 6, lass , Linear address space separation
- 7, 1, eax, 7, cmpccxadd , CMPccXADD instructions
- 7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: CPUID leaf 0x23 is supported
- 7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB
- 7, 1, eax, 11, fsrs , Fast short REP STOSB
- 7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB
- 7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions
- 7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS
- 7, 1, eax, 19, wrmsrns , WRMSRNS instr (WRMSR-non-serializing)
- 7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations
- 7, 1, eax, 22, hreset , History reset support
- 7, 1, eax, 23, avx_ifma , Integer fused multiply add
- 7, 1, eax, 26, lam , Linear address masking
- 7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions
- 7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs)
- 7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions
- 7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions
- 7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids)
- 7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions
- 7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use
- 7, 2, edx, 0, intel_psfd , Intel predictive store forward disable
- 7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S}
- 7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S}
- 7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U
- 7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S
- 7, 2, edx, 5, mcdt_no , MCDT mitigation not needed
- 7, 2, edx, 6, uclock_disable , UC-lock disable is supported
+ 0x7, 0, eax, 31:0, leaf7_n_subleaves , Number of leaf 0x7 subleaves
+ 0x7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support
+ 0x7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported
+ 0x7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions)
+ 0x7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1
+ 0x7, 0, ebx, 4, hle , Hardware Lock Elision
+ 0x7, 0, ebx, 5, avx2 , AVX2 instruction set
+ 0x7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions
+ 0x7, 0, ebx, 7, smep , Supervisor Mode Execution Protection
+ 0x7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2
+ 0x7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB
+ 0x7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID)
+ 0x7, 0, ebx, 11, rtm , Intel restricted transactional memory
+ 0x7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring
+ 0x7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero)
+ 0x7, 0, ebx, 14, mpx , Intel memory protection extensions
+ 0x7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcement
+ 0x7, 0, ebx, 16, avx512f , AVX-512 foundation instructions
+ 0x7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions
+ 0x7, 0, ebx, 18, rdseed , RDSEED instruction
+ 0x7, 0, ebx, 19, adx , ADCX/ADOX instructions
+ 0x7, 0, ebx, 20, smap , Supervisor mode access prevention
+ 0x7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add
+ 0x7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction
+ 0x7, 0, ebx, 24, clwb , CLWB instruction
+ 0x7, 0, ebx, 25, intel_pt , Intel processor trace
+ 0x7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions
+ 0x7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instructions
+ 0x7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instructions
+ 0x7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions
+ 0x7, 0, ebx, 30, avx512bw , AVX-512 byte/word instructions
+ 0x7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions
+ 0x7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only)
+ 0x7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instructions
+ 0x7, 0, ecx, 2, umip , User mode instruction protection
+ 0x7, 0, ecx, 3, pku , Protection keys for user-space
+ 0x7, 0, ecx, 4, ospke , OS protection keys enable
+ 0x7, 0, ecx, 5, waitpkg , WAITPKG instructions
+ 0x7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instructions group 2
+ 0x7, 0, ecx, 7, cet_ss , CET shadow stack features
+ 0x7, 0, ecx, 8, gfni , Galois field new instructions
+ 0x7, 0, ecx, 9, vaes , Vector AES instructions
+ 0x7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support
+ 0x7, 0, ecx, 11, avx512_vnni , Vector neural network instructions
+ 0x7, 0, ecx, 12, avx512_bitalg , AVX-512 bitwise algorithms
+ 0x7, 0, ecx, 13, tme , Intel total memory encryption
+ 0x7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DWORD/QWORD
+ 0x7, 0, ecx, 16, la57 , 57-bit linear addresses (five-level paging)
+ 0x7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode
+ 0x7, 0, ecx, 22, rdpid , RDPID instruction
+ 0x7, 0, ecx, 23, key_locker , Intel key locker support
+ 0x7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection
+ 0x7, 0, ecx, 25, cldemote , CLDEMOTE instruction
+ 0x7, 0, ecx, 27, movdiri , MOVDIRI instruction
+ 0x7, 0, ecx, 28, movdir64b , MOVDIR64B instruction
+ 0x7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S})
+ 0x7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration
+ 0x7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages
+ 0x7, 0, edx, 1, sgx_keys , Intel SGX attestation services
+ 0x7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions
+ 0x7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision
+ 0x7, 0, edx, 4, fsrm , Fast short REP MOV
+ 0x7, 0, edx, 5, uintr , CPU supports user interrupts
+ 0x7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions
+ 0x7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available
+ 0x7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support
+ 0x7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts
+ 0x7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported
+ 0x7, 0, edx, 14, serialize , SERIALIZE instruction
+ 0x7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part'
+ 0x7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking
+ 0x7, 0, edx, 18, pconfig , PCONFIG instruction
+ 0x7, 0, edx, 19, arch_lbr , Intel architectural LBRs
+ 0x7, 0, edx, 20, ibt , CET indirect branch tracking
+ 0x7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support
+ 0x7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions
+ 0x7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support
+ 0x7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support
+ 0x7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions)
+ 0x7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors
+ 0x7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR
+ 0x7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR
+ 0x7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR
+ 0x7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable
+ 0x7, 1, eax, 4, avx_vnni , AVX-VNNI instructions
+ 0x7, 1, eax, 5, avx512_bf16 , AVX-512 bfloat16 instructions
+ 0x7, 1, eax, 6, lass , Linear address space separation
+ 0x7, 1, eax, 7, cmpccxadd , CMPccXADD instructions
+ 0x7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: leaf 0x23 is supported
+ 0x7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB
+ 0x7, 1, eax, 11, fsrs , Fast short REP STOSB
+ 0x7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB
+ 0x7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions
+ 0x7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS
+ 0x7, 1, eax, 19, wrmsrns , WRMSRNS instruction (WRMSR-non-serializing)
+ 0x7, 1, eax, 20, nmi_src , NMI-source reporting with FRED event data
+ 0x7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations
+ 0x7, 1, eax, 22, hreset , History reset support
+ 0x7, 1, eax, 23, avx_ifma , Integer fused multiply add
+ 0x7, 1, eax, 26, lam , Linear address masking
+ 0x7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions
+ 0x7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs)
+ 0x7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions
+ 0x7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions
+ 0x7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids)
+ 0x7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions
+ 0x7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use
+ 0x7, 2, edx, 0, intel_psfd , Intel predictive store forward disable
+ 0x7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S}
+ 0x7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S}
+ 0x7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U
+ 0x7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S
+ 0x7, 2, edx, 5, mcdt_no , MCDT mitigation not needed
+ 0x7, 2, edx, 6, uclock_disable , UC-lock disable is supported
# Leaf 9H
# Intel DCA (Direct Cache Access) enumeration
- 9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS
+ 0x9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS
# Leaf AH
# Intel PMU (Performance Monitoring Unit) enumeration
@@ -310,7 +311,7 @@
0xa, 0, eax, 7:0, pmu_version , Performance monitoring unit version ID
0xa, 0, eax, 15:8, pmu_n_gcounters , Number of general PMU counters per logical CPU
0xa, 0, eax, 23:16, pmu_gcounters_nbits , Bitwidth of PMU general counters
- 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of cpuid leaf 0xa EBX bit vector
+ 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of leaf 0xa EBX bit vector
0xa, 0, ebx, 0, no_core_cycle_evt , Core cycle event not available
0xa, 0, ebx, 1, no_insn_retired_evt , Instruction retired event not available
0xa, 0, ebx, 2, no_refcycle_evt , Reference cycles event not available
@@ -339,18 +340,18 @@
0xd, 0, eax, 0, xcr0_x87 , XCR0.X87 (bit 0) supported
0xd, 0, eax, 1, xcr0_sse , XCR0.SEE (bit 1) supported
0xd, 0, eax, 2, xcr0_avx , XCR0.AVX (bit 2) supported
- 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 regs)
- 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs)
- 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 regs)
- 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs)
- 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs)
- 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU reg)
- 0xd, 0, eax, 11, xcr0_cet_u , AMD XCR0.CET_U (bit 11) supported (CET supervisor state)
- 0xd, 0, eax, 12, xcr0_cet_s , AMD XCR0.CET_S (bit 12) support (CET user state)
+ 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 registers)
+ 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers)
+ 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 registers)
+ 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers)
+ 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers)
+ 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU registers)
+ 0xd, 0, eax, 11, xcr0_cet_u , XCR0.CET_U (bit 11) supported (CET user state)
+ 0xd, 0, eax, 12, xcr0_cet_s , XCR0.CET_S (bit 12) supported (CET supervisor state)
0xd, 0, eax, 17, xcr0_tileconfig , XCR0.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG)
0xd, 0, eax, 18, xcr0_tiledata , XCR0.TILEDATA (bit 18) supported (AMX can manage TILEDATA)
- 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTR area byte size, for XCR0 enabled features
- 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTR area max byte size, all CPU features
+ 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTOR area byte size, for XCR0 enabled features
+ 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTOR area max byte size, all CPU features
0xd, 0, edx, 30, xcr0_lwp , AMD XCR0.LWP (bit 62) supported (Light-weight Profiling)
0xd, 1, eax, 0, xsaveopt , XSAVEOPT instruction
0xd, 1, eax, 1, xsavec , XSAVEC instruction
@@ -369,7 +370,7 @@
0xd, 63:2, eax, 31:0, xsave_sz , Size of save area for subleaf-N feature, in bytes
0xd, 63:2, ebx, 31:0, xsave_offset , Offset of save area for subleaf-N feature, in bytes
0xd, 63:2, ecx, 0, is_xss_bit , Subleaf N describes an XSS bit, otherwise XCR0 bit
- 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature xsave area is 64-byte aligned
+ 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature XSAVE area is 64-byte aligned
# Leaf FH
# Intel RDT / AMD PQoS resource monitoring
@@ -426,17 +427,17 @@
0x12, 1, ecx, 0, xfrm_x87 , Enclave XFRM.X87 (bit 0) supported
0x12, 1, ecx, 1, xfrm_sse , Enclave XFRM.SEE (bit 1) supported
0x12, 1, ecx, 2, xfrm_avx , Enclave XFRM.AVX (bit 2) supported
- 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 regs)
- 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs)
- 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 regs)
- 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs)
- 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs)
- 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU reg)
+ 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 registers)
+ 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers)
+ 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 registers)
+ 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers)
+ 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers)
+ 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU registers)
0x12, 1, ecx, 17, xfrm_tileconfig , Enclave XFRM.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG)
0x12, 1, ecx, 18, xfrm_tiledata , Enclave XFRM.TILEDATA (bit 18) supported (AMX can manage TILEDATA)
0x12, 31:2, eax, 3:0, subleaf_type , Subleaf type (dictates output layout)
- 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base addr, bits[12:31]
- 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base addr, bits[32:51]
+ 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base address, bits[12:31]
+ 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base address, bits[32:51]
0x12, 31:2, ecx, 3:0, epc_sec_type , EPC section type / property encoding
0x12, 31:2, ecx, 31:12, epc_sec_size_0 , EPC section size, bits[12:31]
0x12, 31:2, edx, 19:0, epc_sec_size_1 , EPC section size, bits[32:51]
@@ -444,7 +445,7 @@
# Leaf 14H
# Intel Processor Trace enumeration
- 0x14, 0, eax, 31:0, pt_max_subleaf , Max cpuid 0x14 subleaf
+ 0x14, 0, eax, 31:0, pt_max_subleaf , Maximum leaf 0x14 subleaf
0x14, 0, ebx, 0, cr3_filtering , IA32_RTIT_CR3_MATCH is accessible
0x14, 0, ebx, 1, psb_cyc , Configurable PSB and cycle-accurate mode
0x14, 0, ebx, 2, ip_filtering , IP/TraceStop filtering; Warm-reset PT MSRs preservation
@@ -472,7 +473,7 @@
0x15, 0, ecx, 31:0, cpu_crystal_hz , Core crystal clock nominal frequency, in Hz
# Leaf 16H
-# Intel processor fequency enumeration
+# Intel processor frequency enumeration
0x16, 0, eax, 15:0, cpu_base_mhz , Processor base frequency, in MHz
0x16, 0, ebx, 15:0, cpu_max_mhz , Processor max frequency, in MHz
@@ -481,9 +482,9 @@
# Leaf 17H
# Intel SoC vendor attributes enumeration
- 0x17, 0, eax, 31:0, soc_max_subleaf , Max cpuid leaf 0x17 subleaf
+ 0x17, 0, eax, 31:0, soc_max_subleaf , Maximum leaf 0x17 subleaf
0x17, 0, ebx, 15:0, soc_vendor_id , SoC vendor ID
- 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumaeratoion scheme (not Intel)
+ 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumeration scheme (not Intel)
0x17, 0, ecx, 31:0, soc_proj_id , SoC project ID, assigned by vendor
0x17, 0, edx, 31:0, soc_stepping_id , Soc project stepping ID, assigned by vendor
0x17, 3:1, eax, 31:0, vendor_brand_a , Vendor Brand ID string, bytes subleaf_nr * (0 -> 3)
@@ -494,18 +495,18 @@
# Leaf 18H
# Intel determenestic address translation (TLB) parameters
- 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Max cpuid 0x18 subleaf
+ 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Maximum leaf 0x18 subleaf
0x18, 31:0, ebx, 0, tlb_4k_page , TLB 4KB-page entries supported
0x18, 31:0, ebx, 1, tlb_2m_page , TLB 2MB-page entries supported
0x18, 31:0, ebx, 2, tlb_4m_page , TLB 4MB-page entries supported
0x18, 31:0, ebx, 3, tlb_1g_page , TLB 1GB-page entries supported
- 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this struct
+ 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this structure
0x18, 31:0, ebx, 31:16, n_way_associative , Ways of associativity
0x18, 31:0, ecx, 31:0, n_sets , Number of sets
0x18, 31:0, edx, 4:0, tlb_type , Translation cache type (TLB type)
0x18, 31:0, edx, 7:5, tlb_cache_level , Translation cache level (1-based)
0x18, 31:0, edx, 8, is_fully_associative , Fully-associative structure
- 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max num of addressible IDs for logical CPUs sharing this TLB - 1
+ 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max number of addressable IDs for logical CPUs sharing this TLB - 1
# Leaf 19H
# Intel Key Locker enumeration
@@ -568,7 +569,7 @@
# Intel AMX, TMUL (Tile-matrix MULtiply) accelerator unit enumeration
0x1e, 0, ebx, 7:0, tmul_maxk , TMUL unit maximum height, K (rows or columns)
- 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maxiumum SIMD dimension, N (column bytes)
+ 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maximum SIMD dimension, N (column bytes)
# Leaf 1FH
# Intel extended topology enumeration v2
@@ -623,9 +624,9 @@
0x40000000, 0, edx, 31:0, hypervisor_id_2 , Hypervisor ID string bytes 8 - 11
# Leaf 80000000H
-# Maximum extended leaf number + CPU vendor string (AMD)
+# Maximum extended leaf number + AMD/Transmeta CPU vendor string
-0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended cpuid leaf supported
+0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended CPUID leaf supported
0x80000000, 0, ebx, 31:0, cpu_vendorid_0 , Vendor ID string bytes 0 - 3
0x80000000, 0, ecx, 31:0, cpu_vendorid_2 , Vendor ID string bytes 8 - 11
0x80000000, 0, edx, 31:0, cpu_vendorid_1 , Vendor ID string bytes 4 - 7
@@ -636,6 +637,7 @@
0x80000001, 0, eax, 3:0, e_stepping_id , Stepping ID
0x80000001, 0, eax, 7:4, e_base_model , Base processor model
0x80000001, 0, eax, 11:8, e_base_family , Base processor family
+0x80000001, 0, eax, 13:12, e_base_type , Base processor type (Transmeta)
0x80000001, 0, eax, 19:16, e_ext_model , Extended processor model
0x80000001, 0, eax, 27:20, e_ext_family , Extended processor family
0x80000001, 0, ebx, 15:0, brand_id , Brand ID
@@ -659,7 +661,7 @@
0x80000001, 0, ecx, 17, tce , Translation cache extension
0x80000001, 0, ecx, 19, nodeid_msr , NodeId MSR (0xc001100c)
0x80000001, 0, ecx, 21, tbm , Trailing bit manipulations
-0x80000001, 0, ecx, 22, topoext , Topology Extensions (cpuid leaf 0x8000001d)
+0x80000001, 0, ecx, 22, topoext , Topology Extensions (leaf 0x8000001d)
0x80000001, 0, ecx, 23, perfctr_core , Core performance counter extensions
0x80000001, 0, ecx, 24, perfctr_nb , NB/DF performance counter extensions
0x80000001, 0, ecx, 26, bpext , Data access breakpoint extension
@@ -687,6 +689,7 @@
0x80000001, 0, edx, 19, mp , Out-of-spec AMD Multiprocessing bit
0x80000001, 0, edx, 20, nx , No-execute page protection
0x80000001, 0, edx, 22, mmxext , AMD MMX extensions
+0x80000001, 0, edx, 23, e_mmx , MMX instructions
0x80000001, 0, edx, 24, e_fxsr , FXSAVE and FXRSTOR instructions
0x80000001, 0, edx, 25, fxsr_opt , FXSAVE and FXRSTOR optimizations
0x80000001, 0, edx, 26, pdpe1gb , 1-GB large page support
@@ -720,11 +723,11 @@
0x80000004, 0, edx, 31:0, cpu_brandid_11 , CPU brand ID string, bytes 44 - 47
# Leaf 80000005H
-# AMD L1 cache and L1 TLB enumeration
+# AMD/Transmeta L1 cache and L1 TLB enumeration
-0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entires, 2M and 4M pages
+0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entries, 2M and 4M pages
0x80000005, 0, eax, 15:8, l1_itlb_2m_4m_assoc , L1 ITLB associativity, 2M and 4M pages
-0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entires, 2M and 4M pages
+0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entries, 2M and 4M pages
0x80000005, 0, eax, 31:24, l1_dtlb_2m_4m_assoc , L1 DTLB associativity, 2M and 4M pages
0x80000005, 0, ebx, 7:0, l1_itlb_4k_nentries , L1 ITLB #entries, 4K pages
0x80000005, 0, ebx, 15:8, l1_itlb_4k_assoc , L1 ITLB associativity, 4K pages
@@ -763,11 +766,11 @@
# CPU power management (mostly AMD) and AMD RAS enumeration
0x80000007, 0, ebx, 0, overflow_recov , MCA overflow conditions not fatal
-0x80000007, 0, ebx, 1, succor , Software containment of UnCORRectable errors
+0x80000007, 0, ebx, 1, succor , Software containment of uncorrectable errors
0x80000007, 0, ebx, 2, hw_assert , Hardware assert MSRs
0x80000007, 0, ebx, 3, smca , Scalable MCA (MCAX MSRs)
0x80000007, 0, ecx, 31:0, cpu_pwr_sample_ratio , CPU power sample time ratio
-0x80000007, 0, edx, 0, digital_temp , Digital temprature sensor
+0x80000007, 0, edx, 0, digital_temp , Digital temperature sensor
0x80000007, 0, edx, 1, powernow_freq_id , PowerNOW! frequency scaling
0x80000007, 0, edx, 2, powernow_volt_id , PowerNOW! voltage scaling
0x80000007, 0, edx, 3, thermal_trip , THERMTRIP (Thermal Trip)
@@ -810,7 +813,7 @@
0x80000008, 0, ebx, 23, amd_ppin , Protected Processor Inventory Number
0x80000008, 0, ebx, 24, amd_ssbd , Speculative Store Bypass Disable
0x80000008, 0, ebx, 25, virt_ssbd , virtualized SSBD (Speculative Store Bypass Disable)
-0x80000008, 0, ebx, 26, amd_ssb_no , SSBD not needed (fixed in HW)
+0x80000008, 0, ebx, 26, amd_ssb_no , SSBD is not needed (fixed in hardware)
0x80000008, 0, ebx, 27, cppc , Collaborative Processor Performance Control
0x80000008, 0, ebx, 28, amd_psfd , Predictive Store Forward Disable
0x80000008, 0, ebx, 29, btc_no , CPU not affected by Branch Type Confusion
@@ -838,7 +841,7 @@
0x8000000a, 0, edx, 10, pausefilter , Pause intercept filter
0x8000000a, 0, edx, 12, pfthreshold , Pause filter threshold
0x8000000a, 0, edx, 13, avic , Advanced virtual interrupt controller
-0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virt)
+0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virtualization)
0x8000000a, 0, edx, 16, vgif , Virtualize the Global Interrupt Flag
0x8000000a, 0, edx, 17, gmet , Guest mode execution trap
0x8000000a, 0, edx, 18, x2avic , Virtual x2APIC
@@ -850,7 +853,7 @@
0x8000000a, 0, edx, 25, vnmi , NMI virtualization
0x8000000a, 0, edx, 26, ibs_virt , IBS Virtualization
0x8000000a, 0, edx, 27, ext_lvt_off_chg , Extended LVT offset fault change
-0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME addr check
+0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME address check
# Leaf 80000019H
# AMD TLB 1G-pages enumeration
@@ -891,20 +894,20 @@
# AMD LWP (Lightweight Profiling)
0x8000001c, 0, eax, 0, os_lwp_avail , LWP is available to application programs (supported by OS)
-0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction (EventId=1) is supported by OS
-0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event (EventId=2) is supported by OS
-0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event (EventId=3) is supported by OS
-0x8000001c, 0, eax, 4, os_lwp_dme , DCache Miss Event (EventId=4) is supported by OS
-0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is supported by OS
-0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is supported by OS
+0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction is supported by OS
+0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event is supported by OS
+0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event is supported by OS
+0x8000001c, 0, eax, 4, os_lwp_dme , Dcache Miss Event is supported by OS
+0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event is supported by OS
+0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event is supported by OS
0x8000001c, 0, eax, 29, os_lwp_cont , LWP sampling in continuous mode is supported by OS
0x8000001c, 0, eax, 30, os_lwp_ptsc , Performance Time Stamp Counter in event records is supported by OS
0x8000001c, 0, eax, 31, os_lwp_int , Interrupt on threshold overflow is supported by OS
0x8000001c, 0, ebx, 7:0, lwp_lwpcb_sz , LWP Control Block size, in quadwords
0x8000001c, 0, ebx, 15:8, lwp_event_sz , LWP event record size, in bytes
-0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventId value (EventID 255 not included)
+0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventID value (EventID 255 not included)
0x8000001c, 0, ebx, 31:24, lwp_event_offset , LWP events area offset in the LWP Control Block
-0x8000001c, 0, ecx, 4:0, lwp_latency_max , Num of bits in cache latency counters (10 to 31)
+0x8000001c, 0, ecx, 4:0, lwp_latency_max , Number of bits in cache latency counters (10 to 31)
0x8000001c, 0, ecx, 5, lwp_data_adddr , Cache miss events report the data address of the reference
0x8000001c, 0, ecx, 8:6, lwp_latency_rnd , Amount by which cache latency is rounded
0x8000001c, 0, ecx, 15:9, lwp_version , LWP implementation version
@@ -913,16 +916,16 @@
0x8000001c, 0, ecx, 29, lwp_ip_filtering , IP filtering (IPI, IPF, BaseIP, and LimitIP @ LWPCP) supported
0x8000001c, 0, ecx, 30, lwp_cache_levels , Cache-related events can be filtered by cache level
0x8000001c, 0, ecx, 31, lwp_cache_latency , Cache-related events can be filtered by latency
-0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in Hardware
-0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction (EventId=1) is available in HW
-0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event (EventId=2) is available in HW
-0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event (EventId=3) is available in HW
-0x8000001c, 0, edx, 4, hw_lwp_dme , DCache Miss Event (EventId=4) is available in HW
-0x8000001c, 0, edx, 5, hw_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is available in HW
-0x8000001c, 0, edx, 6, hw_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is available in HW
-0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in HW
-0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in HW
-0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in HW
+0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in hardware
+0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction is available in hardware
+0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event is available in hardware
+0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event is available in hardware
+0x8000001c, 0, edx, 4, hw_lwp_dme , Dcache Miss Event is available in hardware
+0x8000001c, 0, edx, 5, hw_lwp_cnh , Clocks Not Halted event is available in hardware
+0x8000001c, 0, edx, 6, hw_lwp_rnh , Reference clocks Not Halted event is available in hardware
+0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in hardware
+0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in hardware
+0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in hardware
# Leaf 8000001DH
# AMD deterministic cache parameters
@@ -958,10 +961,10 @@
0x8000001f, 0, eax, 4, sev_nested_paging , SEV secure nested paging supported
0x8000001f, 0, eax, 5, vm_permission_levels , VMPL supported
0x8000001f, 0, eax, 6, rpmquery , RPMQUERY instruction supported
-0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadwo stack supported
+0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadow stack supported
0x8000001f, 0, eax, 8, secure_tsc , Secure TSC supported
0x8000001f, 0, eax, 9, v_tsc_aux , Hardware virtualizes TSC_AUX
-0x8000001f, 0, eax, 10, sme_coherent , HW enforces cache coherency across encryption domains
+0x8000001f, 0, eax, 10, sme_coherent , Cache coherency is enforced across encryption domains
0x8000001f, 0, eax, 11, req_64bit_hypervisor , SEV guest mandates 64-bit hypervisor
0x8000001f, 0, eax, 12, restricted_injection , Restricted Injection supported
0x8000001f, 0, eax, 13, alternate_injection , Alternate Injection supported
@@ -973,13 +976,13 @@
0x8000001f, 0, eax, 19, virt_ibs , IBS state virtualization is supported for SEV-ES guests
0x8000001f, 0, eax, 24, vmsa_reg_protection , VMSA register protection is supported
0x8000001f, 0, eax, 25, smt_protection , SMT protection is supported
-0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000h) is supported
+0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000) is supported
0x8000001f, 0, eax, 29, nested_virt_snp_msr , VIRT_RMPUPDATE/VIRT_PSMASH MSRs are supported
0x8000001f, 0, ebx, 5:0, pte_cbit_pos , PTE bit number used to enable memory encryption
0x8000001f, 0, ebx, 11:6, phys_addr_reduction_nbits, Reduction of phys address space when encryption is enabled, in bits
0x8000001f, 0, ebx, 15:12, vmpl_count , Number of VM permission levels (VMPL) supported
0x8000001f, 0, ecx, 31:0, enc_guests_max , Max supported number of simultaneous encrypted guests
-0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Mininum ASID for SEV-enabled SEV-ES-disabled guest
+0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Minimum ASID for SEV-enabled SEV-ES-disabled guest
# Leaf 80000020H
# AMD Platform QoS extended feature IDs
@@ -988,6 +991,8 @@
0x80000020, 0, ebx, 2, smba , Slow Memory Bandwidth Allocation support
0x80000020, 0, ebx, 3, bmec , Bandwidth Monitoring Event Configuration support
0x80000020, 0, ebx, 4, l3rr , L3 Range Reservation support
+0x80000020, 0, ebx, 5, abmc , Assignable Bandwidth Monitoring Counters
+0x80000020, 0, ebx, 6, sdciae , Smart Data Cache Injection (SDCI) Allocation Enforcement
0x80000020, 1, eax, 31:0, mba_limit_len , MBA enforcement limit size
0x80000020, 1, edx, 31:0, mba_cos_max , MBA max Class of Service number (zero-based)
0x80000020, 2, eax, 31:0, smba_limit_len , SMBA enforcement limit size
@@ -1007,17 +1012,26 @@
0x80000021, 0, eax, 0, no_nested_data_bp , No nested data breakpoints
0x80000021, 0, eax, 1, fsgs_non_serializing , WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing
0x80000021, 0, eax, 2, lfence_rdtsc , LFENCE always serializing / synchronizes RDTSC
-0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock is supported
+0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock
0x80000021, 0, eax, 6, null_sel_clr_base , Null selector clears base
-0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore Enable bit supported
-0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS enable bit supported
-0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not present
-0x80000021, 0, eax, 10, fsrs_supported , Fast Short Rep Stosb (FSRS) is supported
-0x80000021, 0, eax, 11, fsrc_supported , Fast Short Repe Cmpsb (FSRC) is supported
-0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is supported
+0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore
+0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS
+0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not available
+0x80000021, 0, eax, 10, fsrs , Fast Short Rep STOSB
+0x80000021, 0, eax, 11, fsrc , Fast Short Rep CMPSB
+0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is available
+0x80000021, 0, eax, 16, opcode_reclaim , Reserves opcode space
0x80000021, 0, eax, 17, user_cpuid_disable , #GP when executing CPUID at CPL > 0 is supported
-0x80000021, 0, eax, 18, epsf_supported , Enhanced Predictive Store Forwarding (EPSF) is supported
-0x80000021, 0, ebx, 11:0, microcode_patch_size , Size of microcode patch, in 16-byte units
+0x80000021, 0, eax, 18, epsf , Enhanced Predictive Store Forwarding
+0x80000021, 0, eax, 22, wl_feedback , Workload-based heuristic feedback to OS
+0x80000021, 0, eax, 24, eraps , Enhanced Return Address Predictor Security
+0x80000021, 0, eax, 27, sbpb , Selective Branch Predictor Barrier
+0x80000021, 0, eax, 28, ibpb_brtype , Branch predictions flushed from CPU branch predictor
+0x80000021, 0, eax, 29, srso_no , CPU is not subject to the SRSO vulnerability
+0x80000021, 0, eax, 30, srso_uk_no , CPU is not vulnerable to SRSO at user-kernel boundary
+0x80000021, 0, eax, 31, srso_msr_fix , Software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO
+0x80000021, 0, ebx, 15:0, microcode_patch_size , Size of microcode patch, in 16-byte units
+0x80000021, 0, ebx, 23:16, rap_size , Return Address Predictor size
# Leaf 80000022H
# AMD Performance Monitoring v2 enumeration
@@ -1025,7 +1039,7 @@
0x80000022, 0, eax, 0, perfmon_v2 , Performance monitoring v2 supported
0x80000022, 0, eax, 1, lbr_v2 , Last Branch Record v2 extensions (LBR Stack)
0x80000022, 0, eax, 2, lbr_pmc_freeze , Freezing core performance counters / LBR Stack supported
-0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core perfomance counters
+0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core performance counters
0x80000022, 0, ebx, 9:4, lbr_v2_stack_size , Number of available LBR stack entries
0x80000022, 0, ebx, 15:10, n_pmc_northbridge , Number of available northbridge (data fabric) performance counters
0x80000022, 0, ebx, 21:16, n_pmc_umc , Number of available UMC performance counters
@@ -1035,7 +1049,7 @@
# AMD Secure Multi-key Encryption enumeration
0x80000023, 0, eax, 0, mem_hmk_mode , MEM-HMK encryption mode is supported
-0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total num of available encryption keys
+0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total number of available encryption keys
# Leaf 80000026H
# AMD extended topology enumeration v2
@@ -1051,3 +1065,108 @@
0x80000026, 3:0, ecx, 7:0, domain_level , This domain level (subleaf ID)
0x80000026, 3:0, ecx, 15:8, domain_type , This domain type
0x80000026, 3:0, edx, 31:0, x2apic_id , x2APIC ID of current logical CPU
+
+# Leaf 80860000H
+# Maximum Transmeta leaf number + CPU vendor ID string
+
+0x80860000, 0, eax, 31:0, max_tra_leaf , Maximum supported Transmeta leaf number
+0x80860000, 0, ebx, 31:0, cpu_vendorid_0 , Transmeta Vendor ID string bytes 0 - 3
+0x80860000, 0, ecx, 31:0, cpu_vendorid_2 , Transmeta Vendor ID string bytes 8 - 11
+0x80860000, 0, edx, 31:0, cpu_vendorid_1 , Transmeta Vendor ID string bytes 4 - 7
+
+# Leaf 80860001H
+# Transmeta extended CPU information
+
+0x80860001, 0, eax, 3:0, stepping , Stepping ID
+0x80860001, 0, eax, 7:4, base_model , Base CPU model ID
+0x80860001, 0, eax, 11:8, base_family_id , Base CPU family ID
+0x80860001, 0, eax, 13:12, cpu_type , CPU type
+0x80860001, 0, ebx, 7:0, cpu_rev_mask_minor , CPU revision ID, mask minor
+0x80860001, 0, ebx, 15:8, cpu_rev_mask_major , CPU revision ID, mask major
+0x80860001, 0, ebx, 23:16, cpu_rev_minor , CPU revision ID, minor
+0x80860001, 0, ebx, 31:24, cpu_rev_major , CPU revision ID, major
+0x80860001, 0, ecx, 31:0, cpu_base_mhz , CPU nominal frequency, in MHz
+0x80860001, 0, edx, 0, recovery , Recovery CMS is active (after bad flush)
+0x80860001, 0, edx, 1, longrun , LongRun power management capabilities
+0x80860001, 0, edx, 3, lrti , LongRun Table Interface
+
+# Leaf 80860002H
+# Transmeta Code Morphing Software (CMS) enumeration
+
+0x80860002, 0, eax, 31:0, cpu_rev_id , CPU revision ID
+0x80860002, 0, ebx, 7:0, cms_rev_mask_2 , CMS revision ID, mask component 2
+0x80860002, 0, ebx, 15:8, cms_rev_mask_1 , CMS revision ID, mask component 1
+0x80860002, 0, ebx, 23:16, cms_rev_minor , CMS revision ID, minor
+0x80860002, 0, ebx, 31:24, cms_rev_major , CMS revision ID, major
+0x80860002, 0, ecx, 31:0, cms_rev_mask_3 , CMS revision ID, mask component 3
+
+# Leaf 80860003H
+# Transmeta CPU information string, bytes 0 - 15
+
+0x80860003, 0, eax, 31:0, cpu_info_0 , CPU info string bytes 0 - 3
+0x80860003, 0, ebx, 31:0, cpu_info_1 , CPU info string bytes 4 - 7
+0x80860003, 0, ecx, 31:0, cpu_info_2 , CPU info string bytes 8 - 11
+0x80860003, 0, edx, 31:0, cpu_info_3 , CPU info string bytes 12 - 15
+
+# Leaf 80860004H
+# Transmeta CPU information string, bytes 16 - 31
+
+0x80860004, 0, eax, 31:0, cpu_info_4 , CPU info string bytes 16 - 19
+0x80860004, 0, ebx, 31:0, cpu_info_5 , CPU info string bytes 20 - 23
+0x80860004, 0, ecx, 31:0, cpu_info_6 , CPU info string bytes 24 - 27
+0x80860004, 0, edx, 31:0, cpu_info_7 , CPU info string bytes 28 - 31
+
+# Leaf 80860005H
+# Transmeta CPU information string, bytes 32 - 47
+
+0x80860005, 0, eax, 31:0, cpu_info_8 , CPU info string bytes 32 - 35
+0x80860005, 0, ebx, 31:0, cpu_info_9 , CPU info string bytes 36 - 39
+0x80860005, 0, ecx, 31:0, cpu_info_10 , CPU info string bytes 40 - 43
+0x80860005, 0, edx, 31:0, cpu_info_11 , CPU info string bytes 44 - 47
+
+# Leaf 80860006H
+# Transmeta CPU information string, bytes 48 - 63
+
+0x80860006, 0, eax, 31:0, cpu_info_12 , CPU info string bytes 48 - 51
+0x80860006, 0, ebx, 31:0, cpu_info_13 , CPU info string bytes 52 - 55
+0x80860006, 0, ecx, 31:0, cpu_info_14 , CPU info string bytes 56 - 59
+0x80860006, 0, edx, 31:0, cpu_info_15 , CPU info string bytes 60 - 63
+
+# Leaf 80860007H
+# Transmeta live CPU information
+
+0x80860007, 0, eax, 31:0, cpu_cur_mhz , Current CPU frequency, in MHz
+0x80860007, 0, ebx, 31:0, cpu_cur_voltage , Current CPU voltage, in millivolts
+0x80860007, 0, ecx, 31:0, cpu_cur_perf_pctg , Current CPU performance percentage, 0 - 100
+0x80860007, 0, edx, 31:0, cpu_cur_gate_delay , Current CPU gate delay, in femtoseconds
+
+# Leaf C0000000H
+# Maximum Centaur/Zhaoxin leaf number
+
+0xc0000000, 0, eax, 31:0, max_cntr_leaf , Maximum Centaur/Zhaoxin leaf number
+
+# Leaf C0000001H
+# Centaur/Zhaoxin extended CPU features
+
+0xc0000001, 0, edx, 0, ccs_sm2 , CCS SM2 instructions
+0xc0000001, 0, edx, 1, ccs_sm2_en , CCS SM2 enabled
+0xc0000001, 0, edx, 2, xstore , Random Number Generator
+0xc0000001, 0, edx, 3, xstore_en , RNG enabled
+0xc0000001, 0, edx, 4, ccs_sm3_sm4 , CCS SM3 and SM4 instructions
+0xc0000001, 0, edx, 5, ccs_sm3_sm4_en , CCS SM3/SM4 enabled
+0xc0000001, 0, edx, 6, ace , Advanced Cryptography Engine
+0xc0000001, 0, edx, 7, ace_en , ACE enabled
+0xc0000001, 0, edx, 8, ace2 , Advanced Cryptography Engine v2
+0xc0000001, 0, edx, 9, ace2_en , ACE v2 enabled
+0xc0000001, 0, edx, 10, phe , PadLock Hash Engine
+0xc0000001, 0, edx, 11, phe_en , PHE enabled
+0xc0000001, 0, edx, 12, pmm , PadLock Montgomery Multiplier
+0xc0000001, 0, edx, 13, pmm_en , PMM enabled
+0xc0000001, 0, edx, 16, parallax , Parallax auto adjust processor voltage
+0xc0000001, 0, edx, 17, parallax_en , Parallax enabled
+0xc0000001, 0, edx, 20, tm3 , Thermal Monitor v3
+0xc0000001, 0, edx, 21, tm3_en , TM v3 enabled
+0xc0000001, 0, edx, 25, phe2 , PadLock Hash Engine v2 (SHA384/SHA512)
+0xc0000001, 0, edx, 26, phe2_en , PHE v2 enabled
+0xc0000001, 0, edx, 27, rsa , RSA instructions (XMODEXP/MONTMUL2)
+0xc0000001, 0, edx, 28, rsa_en , RSA instructions enabled
diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c
index 1b25c0a95d3f..7dc6b9235d02 100644
--- a/tools/arch/x86/kcpuid/kcpuid.c
+++ b/tools/arch/x86/kcpuid/kcpuid.c
@@ -1,14 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
-#include <stdio.h>
+#include <cpuid.h>
+#include <err.h>
+#include <getopt.h>
#include <stdbool.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <getopt.h>
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define min(a, b) (((a) < (b)) ? (a) : (b))
+#define __noreturn __attribute__((__noreturn__))
typedef unsigned int u32;
typedef unsigned long long u64;
@@ -49,7 +52,7 @@ static const char * const reg_names[] = {
struct subleaf {
u32 index;
u32 sub;
- u32 eax, ebx, ecx, edx;
+ u32 output[NR_REGS];
struct reg_desc info[NR_REGS];
};
@@ -63,21 +66,64 @@ struct cpuid_func {
int nr;
};
+enum range_index {
+ RANGE_STD = 0, /* Standard */
+ RANGE_EXT = 0x80000000, /* Extended */
+ RANGE_TSM = 0x80860000, /* Transmeta */
+ RANGE_CTR = 0xc0000000, /* Centaur/Zhaoxin */
+};
+
+#define CPUID_INDEX_MASK 0xffff0000
+#define CPUID_FUNCTION_MASK (~CPUID_INDEX_MASK)
+
struct cpuid_range {
/* array of main leafs */
struct cpuid_func *funcs;
/* number of valid leafs */
int nr;
- bool is_ext;
+ enum range_index index;
};
-/*
- * basic: basic functions range: [0... ]
- * ext: extended functions range: [0x80000000... ]
- */
-struct cpuid_range *leafs_basic, *leafs_ext;
+static struct cpuid_range ranges[] = {
+ { .index = RANGE_STD, },
+ { .index = RANGE_EXT, },
+ { .index = RANGE_TSM, },
+ { .index = RANGE_CTR, },
+};
+
+static char *range_to_str(struct cpuid_range *range)
+{
+ switch (range->index) {
+ case RANGE_STD: return "Standard";
+ case RANGE_EXT: return "Extended";
+ case RANGE_TSM: return "Transmeta";
+ case RANGE_CTR: return "Centaur";
+ default: return NULL;
+ }
+}
+
+#define __for_each_cpuid_range(range, __condition) \
+ for (unsigned int i = 0; \
+ i < ARRAY_SIZE(ranges) && ((range) = &ranges[i]) && (__condition); \
+ i++)
+
+#define for_each_valid_cpuid_range(range) __for_each_cpuid_range(range, (range)->nr != 0)
+#define for_each_cpuid_range(range) __for_each_cpuid_range(range, true)
+
+struct cpuid_range *index_to_cpuid_range(u32 index)
+{
+ u32 func_idx = index & CPUID_FUNCTION_MASK;
+ u32 range_idx = index & CPUID_INDEX_MASK;
+ struct cpuid_range *range;
+
+ for_each_valid_cpuid_range(range) {
+ if (range->index == range_idx && (u32)range->nr > func_idx)
+ return range;
+ }
+
+ return NULL;
+}
-static bool is_amd;
static bool show_details;
static bool show_raw;
static bool show_flags_only = true;
@@ -85,16 +131,16 @@ static u32 user_index = 0xFFFFFFFF;
static u32 user_sub = 0xFFFFFFFF;
static int flines;
-static inline void cpuid(u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
+/*
+ * Force using <cpuid.h> __cpuid_count() instead of __cpuid(). The
+ * latter leaves ECX uninitialized, which can break CPUID queries.
+ */
+
+#define cpuid(leaf, a, b, c, d) \
+ __cpuid_count(leaf, 0, a, b, c, d)
+
+#define cpuid_count(leaf, subleaf, a, b, c, d) \
+ __cpuid_count(leaf, subleaf, a, b, c, d)
static inline bool has_subleafs(u32 f)
{
@@ -117,11 +163,11 @@ static void leaf_print_raw(struct subleaf *leaf)
if (leaf->sub == 0)
printf("0x%08x: subleafs:\n", leaf->index);
- printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n",
- leaf->sub, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx);
+ printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", leaf->sub,
+ leaf->output[0], leaf->output[1], leaf->output[2], leaf->output[3]);
} else {
- printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n",
- leaf->index, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx);
+ printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", leaf->index,
+ leaf->output[0], leaf->output[1], leaf->output[2], leaf->output[3]);
}
}
@@ -140,19 +186,19 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf,
* Cut off vendor-prefix from CPUID function as we're using it as an
* index into ->funcs.
*/
- func = &range->funcs[f & 0xffff];
+ func = &range->funcs[f & CPUID_FUNCTION_MASK];
if (!func->leafs) {
func->leafs = malloc(sizeof(struct subleaf));
if (!func->leafs)
- perror("malloc func leaf");
+ err(EXIT_FAILURE, NULL);
func->nr = 1;
} else {
s = func->nr;
func->leafs = realloc(func->leafs, (s + 1) * sizeof(*leaf));
if (!func->leafs)
- perror("realloc f->leafs");
+ err(EXIT_FAILURE, NULL);
func->nr++;
}
@@ -161,84 +207,73 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf,
leaf->index = f;
leaf->sub = subleaf;
- leaf->eax = a;
- leaf->ebx = b;
- leaf->ecx = c;
- leaf->edx = d;
+ leaf->output[R_EAX] = a;
+ leaf->output[R_EBX] = b;
+ leaf->output[R_ECX] = c;
+ leaf->output[R_EDX] = d;
return false;
}
static void raw_dump_range(struct cpuid_range *range)
{
- u32 f;
- int i;
-
- printf("%s Leafs :\n", range->is_ext ? "Extended" : "Basic");
+ printf("%s Leafs :\n", range_to_str(range));
printf("================\n");
- for (f = 0; (int)f < range->nr; f++) {
+ for (u32 f = 0; (int)f < range->nr; f++) {
struct cpuid_func *func = &range->funcs[f];
- u32 index = f;
-
- if (range->is_ext)
- index += 0x80000000;
/* Skip leaf without valid items */
if (!func->nr)
continue;
/* First item is the main leaf, followed by all subleafs */
- for (i = 0; i < func->nr; i++)
+ for (int i = 0; i < func->nr; i++)
leaf_print_raw(&func->leafs[i]);
}
}
#define MAX_SUBLEAF_NUM 64
-struct cpuid_range *setup_cpuid_range(u32 input_eax)
+#define MAX_RANGE_INDEX_OFFSET 0xff
+void setup_cpuid_range(struct cpuid_range *range)
{
- u32 max_func, idx_func, subleaf, max_subleaf;
- u32 eax, ebx, ecx, edx, f = input_eax;
- struct cpuid_range *range;
- bool allzero;
-
- eax = input_eax;
- ebx = ecx = edx = 0;
+ u32 max_func, range_funcs_sz;
+ u32 eax, ebx, ecx, edx;
- cpuid(&eax, &ebx, &ecx, &edx);
- max_func = eax;
- idx_func = (max_func & 0xffff) + 1;
+ cpuid(range->index, max_func, ebx, ecx, edx);
- range = malloc(sizeof(struct cpuid_range));
- if (!range)
- perror("malloc range");
+ /*
+ * If the CPUID range's maximum function value is garbage, then it
+ * is not recognized by this CPU. Set the range's number of valid
+ * leaves to zero so that for_each_valid_cpu_range() can ignore it.
+ */
+ if (max_func < range->index || max_func > (range->index + MAX_RANGE_INDEX_OFFSET)) {
+ range->nr = 0;
+ return;
+ }
- if (input_eax & 0x80000000)
- range->is_ext = true;
- else
- range->is_ext = false;
+ range->nr = (max_func & CPUID_FUNCTION_MASK) + 1;
+ range_funcs_sz = range->nr * sizeof(struct cpuid_func);
- range->funcs = malloc(sizeof(struct cpuid_func) * idx_func);
+ range->funcs = malloc(range_funcs_sz);
if (!range->funcs)
- perror("malloc range->funcs");
+ err(EXIT_FAILURE, NULL);
- range->nr = idx_func;
- memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func);
+ memset(range->funcs, 0, range_funcs_sz);
- for (; f <= max_func; f++) {
- eax = f;
- subleaf = ecx = 0;
+ for (u32 f = range->index; f <= max_func; f++) {
+ u32 max_subleaf = MAX_SUBLEAF_NUM;
+ bool allzero;
- cpuid(&eax, &ebx, &ecx, &edx);
- allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx);
+ cpuid(f, eax, ebx, ecx, edx);
+
+ allzero = cpuid_store(range, f, 0, eax, ebx, ecx, edx);
if (allzero)
continue;
if (!has_subleafs(f))
continue;
- max_subleaf = MAX_SUBLEAF_NUM;
-
/*
* Some can provide the exact number of subleafs,
* others have to be tried (0xf)
@@ -256,20 +291,15 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax)
if (f == 0x80000026)
max_subleaf = 5;
- for (subleaf = 1; subleaf < max_subleaf; subleaf++) {
- eax = f;
- ecx = subleaf;
+ for (u32 subleaf = 1; subleaf < max_subleaf; subleaf++) {
+ cpuid_count(f, subleaf, eax, ebx, ecx, edx);
- cpuid(&eax, &ebx, &ecx, &edx);
- allzero = cpuid_store(range, f, subleaf,
- eax, ebx, ecx, edx);
+ allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx);
if (allzero)
continue;
}
}
-
- return range;
}
/*
@@ -280,15 +310,13 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax)
* 0, 0, EAX, 31:0, max_basic_leafs, Max input value for supported subleafs
* 1, 0, ECX, 0, sse3, Streaming SIMD Extensions 3(SSE3)
*/
-static int parse_line(char *line)
+static void parse_line(char *line)
{
char *str;
- int i;
struct cpuid_range *range;
struct cpuid_func *func;
struct subleaf *leaf;
u32 index;
- u32 sub;
char buffer[512];
char *buf;
/*
@@ -310,12 +338,12 @@ static int parse_line(char *line)
/* Skip comments and NULL line */
if (line[0] == '#' || line[0] == '\n')
- return 0;
+ return;
strncpy(buffer, line, 511);
buffer[511] = 0;
str = buffer;
- for (i = 0; i < 5; i++) {
+ for (int i = 0; i < 5; i++) {
tokens[i] = strtok(str, ",");
if (!tokens[i])
goto err_exit;
@@ -328,21 +356,19 @@ static int parse_line(char *line)
/* index/main-leaf */
index = strtoull(tokens[0], NULL, 0);
- if (index & 0x80000000)
- range = leafs_ext;
- else
- range = leafs_basic;
-
- index &= 0x7FFFFFFF;
- /* Skip line parsing for non-existing indexes */
- if ((int)index >= range->nr)
- return -1;
+ /*
+ * Skip line parsing if the index is not covered by known-valid
+ * CPUID ranges on this CPU.
+ */
+ range = index_to_cpuid_range(index);
+ if (!range)
+ return;
+ /* Skip line parsing if the index CPUID output is all zero */
+ index &= CPUID_FUNCTION_MASK;
func = &range->funcs[index];
-
- /* Return if the index has no valid item on this platform */
if (!func->nr)
- return 0;
+ return;
/* subleaf */
buf = tokens[1];
@@ -355,11 +381,11 @@ static int parse_line(char *line)
subleaf_start = strtoul(start, NULL, 0);
subleaf_end = min(subleaf_end, (u32)(func->nr - 1));
if (subleaf_start > subleaf_end)
- return 0;
+ return;
} else {
subleaf_start = subleaf_end;
if (subleaf_start > (u32)(func->nr - 1))
- return 0;
+ return;
}
/* register */
@@ -382,7 +408,7 @@ static int parse_line(char *line)
bit_end = strtoul(end, NULL, 0);
bit_start = (start) ? strtoul(start, NULL, 0) : bit_end;
- for (sub = subleaf_start; sub <= subleaf_end; sub++) {
+ for (u32 sub = subleaf_start; sub <= subleaf_end; sub++) {
leaf = &func->leafs[sub];
reg = &leaf->info[reg_index];
bdesc = &reg->descs[reg->nr++];
@@ -392,12 +418,11 @@ static int parse_line(char *line)
strcpy(bdesc->simp, strtok(tokens[4], " \t"));
strcpy(bdesc->detail, tokens[5]);
}
- return 0;
+ return;
err_exit:
- printf("Warning: wrong line format:\n");
- printf("\tline[%d]: %s\n", flines, line);
- return -1;
+ warnx("Wrong line format:\n"
+ "\tline[%d]: %s", flines, line);
}
/* Parse csv file, and construct the array of all leafs and subleafs */
@@ -418,10 +443,8 @@ static void parse_text(void)
file = fopen("./cpuid.csv", "r");
}
- if (!file) {
- printf("Fail to open '%s'\n", filename);
- return;
- }
+ if (!file)
+ err(EXIT_FAILURE, "%s", filename);
while (1) {
ret = getline(&line, &len, file);
@@ -436,21 +459,13 @@ static void parse_text(void)
fclose(file);
}
-
-/* Decode every eax/ebx/ecx/edx */
-static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg)
+static void show_reg(const struct reg_desc *rdesc, u32 value)
{
- struct bits_desc *bdesc;
- int start, end, i;
+ const struct bits_desc *bdesc;
+ int start, end;
u32 mask;
- if (!rdesc->nr) {
- if (show_details)
- printf("\t %s: 0x%08x\n", reg_names[reg], value);
- return;
- }
-
- for (i = 0; i < rdesc->nr; i++) {
+ for (int i = 0; i < rdesc->nr; i++) {
bdesc = &rdesc->descs[i];
start = bdesc->start;
@@ -480,23 +495,21 @@ static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg)
}
}
-static void show_leaf(struct subleaf *leaf)
+static void show_reg_header(bool has_entries, u32 leaf, u32 subleaf, const char *reg_name)
{
- if (!leaf)
- return;
+ if (show_details && has_entries)
+ printf("CPUID_0x%x_%s[0x%x]:\n", leaf, reg_name, subleaf);
+}
- if (show_raw) {
+static void show_leaf(struct subleaf *leaf)
+{
+ if (show_raw)
leaf_print_raw(leaf);
- } else {
- if (show_details)
- printf("CPUID_0x%x_ECX[0x%x]:\n",
- leaf->index, leaf->sub);
- }
- decode_bits(leaf->eax, &leaf->info[R_EAX], R_EAX);
- decode_bits(leaf->ebx, &leaf->info[R_EBX], R_EBX);
- decode_bits(leaf->ecx, &leaf->info[R_ECX], R_ECX);
- decode_bits(leaf->edx, &leaf->info[R_EDX], R_EDX);
+ for (int i = R_EAX; i < NR_REGS; i++) {
+ show_reg_header((leaf->info[i].nr > 0), leaf->index, leaf->sub, reg_names[i]);
+ show_reg(&leaf->info[i], leaf->output[i]);
+ }
if (!show_raw && show_details)
printf("\n");
@@ -504,46 +517,37 @@ static void show_leaf(struct subleaf *leaf)
static void show_func(struct cpuid_func *func)
{
- int i;
-
- if (!func)
- return;
-
- for (i = 0; i < func->nr; i++)
+ for (int i = 0; i < func->nr; i++)
show_leaf(&func->leafs[i]);
}
static void show_range(struct cpuid_range *range)
{
- int i;
-
- for (i = 0; i < range->nr; i++)
+ for (int i = 0; i < range->nr; i++)
show_func(&range->funcs[i]);
}
static inline struct cpuid_func *index_to_func(u32 index)
{
+ u32 func_idx = index & CPUID_FUNCTION_MASK;
struct cpuid_range *range;
- u32 func_idx;
-
- range = (index & 0x80000000) ? leafs_ext : leafs_basic;
- func_idx = index & 0xffff;
- if ((func_idx + 1) > (u32)range->nr) {
- printf("ERR: invalid input index (0x%x)\n", index);
+ range = index_to_cpuid_range(index);
+ if (!range)
return NULL;
- }
+
return &range->funcs[func_idx];
}
static void show_info(void)
{
+ struct cpuid_range *range;
struct cpuid_func *func;
if (show_raw) {
/* Show all of the raw output of 'cpuid' instr */
- raw_dump_range(leafs_basic);
- raw_dump_range(leafs_ext);
+ for_each_valid_cpuid_range(range)
+ raw_dump_range(range);
return;
}
@@ -551,18 +555,19 @@ static void show_info(void)
/* Only show specific leaf/subleaf info */
func = index_to_func(user_index);
if (!func)
- return;
+ errx(EXIT_FAILURE, "Invalid input leaf (0x%x)", user_index);
/* Dump the raw data also */
show_raw = true;
if (user_sub != 0xFFFFFFFF) {
- if (user_sub + 1 <= (u32)func->nr) {
- show_leaf(&func->leafs[user_sub]);
- return;
+ if (user_sub + 1 > (u32)func->nr) {
+ errx(EXIT_FAILURE, "Leaf 0x%x has no valid subleaf = 0x%x",
+ user_index, user_sub);
}
- printf("ERR: invalid input subleaf (0x%x)\n", user_sub);
+ show_leaf(&func->leafs[user_sub]);
+ return;
}
show_func(func);
@@ -570,38 +575,21 @@ static void show_info(void)
}
printf("CPU features:\n=============\n\n");
- show_range(leafs_basic);
- show_range(leafs_ext);
+ for_each_valid_cpuid_range(range)
+ show_range(range);
}
-static void setup_platform_cpuid(void)
+static void __noreturn usage(int exit_code)
{
- u32 eax, ebx, ecx, edx;
-
- /* Check vendor */
- eax = ebx = ecx = edx = 0;
- cpuid(&eax, &ebx, &ecx, &edx);
-
- /* "htuA" */
- if (ebx == 0x68747541)
- is_amd = true;
-
- /* Setup leafs for the basic and extended range */
- leafs_basic = setup_cpuid_range(0x0);
- leafs_ext = setup_cpuid_range(0x80000000);
-}
-
-static void usage(void)
-{
- printf("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n"
- "\t-a|--all Show both bit flags and complex bit fields info\n"
- "\t-b|--bitflags Show boolean flags only\n"
- "\t-d|--detail Show details of the flag/fields (default)\n"
- "\t-f|--flags Specify the cpuid csv file\n"
- "\t-h|--help Show usage info\n"
- "\t-l|--leaf=index Specify the leaf you want to check\n"
- "\t-r|--raw Show raw cpuid data\n"
- "\t-s|--subleaf=sub Specify the subleaf you want to check\n"
+ errx(exit_code, "kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n"
+ "\t-a|--all Show both bit flags and complex bit fields info\n"
+ "\t-b|--bitflags Show boolean flags only\n"
+ "\t-d|--detail Show details of the flag/fields (default)\n"
+ "\t-f|--flags Specify the CPUID CSV file\n"
+ "\t-h|--help Show usage info\n"
+ "\t-l|--leaf=index Specify the leaf you want to check\n"
+ "\t-r|--raw Show raw CPUID data\n"
+ "\t-s|--subleaf=sub Specify the subleaf you want to check"
);
}
@@ -617,7 +605,7 @@ static struct option opts[] = {
{ NULL, 0, NULL, 0 }
};
-static int parse_options(int argc, char *argv[])
+static void parse_options(int argc, char *argv[])
{
int c;
@@ -637,9 +625,7 @@ static int parse_options(int argc, char *argv[])
user_csv = optarg;
break;
case 'h':
- usage();
- exit(1);
- break;
+ usage(EXIT_SUCCESS);
case 'l':
/* main leaf */
user_index = strtoul(optarg, NULL, 0);
@@ -652,11 +638,8 @@ static int parse_options(int argc, char *argv[])
user_sub = strtoul(optarg, NULL, 0);
break;
default:
- printf("%s: Invalid option '%c'\n", argv[0], optopt);
- return -1;
- }
-
- return 0;
+ usage(EXIT_FAILURE);
+ }
}
/*
@@ -669,11 +652,13 @@ static int parse_options(int argc, char *argv[])
*/
int main(int argc, char *argv[])
{
- if (parse_options(argc, argv))
- return -1;
+ struct cpuid_range *range;
+
+ parse_options(argc, argv);
/* Setup the cpuid leafs of current platform */
- setup_platform_cpuid();
+ for_each_cpuid_range(range)
+ setup_cpuid_range(range);
/* Read and parse the 'cpuid.csv' */
parse_text();
diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
index e91d4c4e1c16..bce69c6bfa69 100644
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -324,6 +324,11 @@ int insn_get_opcode(struct insn *insn)
}
insn->attr = inat_get_opcode_attribute(op);
+ if (insn->x86_64 && inat_is_invalid64(insn->attr)) {
+ /* This instruction is invalid, like UD2. Stop decoding. */
+ insn->attr &= INAT_INV64;
+ }
+
while (inat_is_escape(insn->attr)) {
/* Get escaped opcode */
op = get_next(insn_byte_t, insn);
@@ -337,6 +342,7 @@ int insn_get_opcode(struct insn *insn)
insn->attr = 0;
return -EINVAL;
}
+
end:
opcode->got = 1;
return 0;
@@ -658,7 +664,6 @@ int insn_get_immediate(struct insn *insn)
}
if (!inat_has_immediate(insn->attr))
- /* no immediates */
goto done;
switch (inat_immediate_size(insn->attr)) {
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index 0199d56cb479..d66b710d628f 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -3,6 +3,7 @@
#include <linux/export.h>
#include <linux/linkage.h>
+#include <linux/cfi_types.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
@@ -28,7 +29,7 @@
* only for the return value that is the same as the source input,
* which the compiler could/should do much better anyway.
*/
-SYM_FUNC_START(__memset)
+SYM_TYPED_FUNC_START(__memset)
ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
movq %rdi,%r9
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index caedb3ef6688..262f7ca1fb95 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -35,7 +35,7 @@
# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
#
-# REX2 Prefix
+# REX2 Prefix Superscripts
# - (!REX2): REX2 is not allowed
# - (REX2): REX2 variant e.g. JMPABS
@@ -147,7 +147,7 @@ AVXcode:
# 0x60 - 0x6f
60: PUSHA/PUSHAD (i64)
61: POPA/POPAD (i64)
-62: BOUND Gv,Ma (i64) | EVEX (Prefix)
+62: BOUND Gv,Ma (i64) | EVEX (Prefix),(o64)
63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
64: SEG=FS (Prefix)
65: SEG=GS (Prefix)
@@ -253,8 +253,8 @@ c0: Grp2 Eb,Ib (1A)
c1: Grp2 Ev,Ib (1A)
c2: RETN Iw (f64)
c3: RETN
-c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
-c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix),(o64)
+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix),(o64)
c6: Grp11A Eb,Ib (1A)
c7: Grp11B Ev,Iz (1A)
c8: ENTER Iw,Ib
@@ -286,10 +286,10 @@ df: ESC
# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
-e0: LOOPNE/LOOPNZ Jb (f64) (!REX2)
-e1: LOOPE/LOOPZ Jb (f64) (!REX2)
-e2: LOOP Jb (f64) (!REX2)
-e3: JrCXZ Jb (f64) (!REX2)
+e0: LOOPNE/LOOPNZ Jb (f64),(!REX2)
+e1: LOOPE/LOOPZ Jb (f64),(!REX2)
+e2: LOOP Jb (f64),(!REX2)
+e3: JrCXZ Jb (f64),(!REX2)
e4: IN AL,Ib (!REX2)
e5: IN eAX,Ib (!REX2)
e6: OUT Ib,AL (!REX2)
@@ -298,10 +298,10 @@ e7: OUT Ib,eAX (!REX2)
# in "near" jumps and calls is 16-bit. For CALL,
# push of return address is 16-bit wide, RSP is decremented by 2
# but is not truncated to 16 bits, unlike RIP.
-e8: CALL Jz (f64) (!REX2)
-e9: JMP-near Jz (f64) (!REX2)
-ea: JMP-far Ap (i64) (!REX2)
-eb: JMP-short Jb (f64) (!REX2)
+e8: CALL Jz (f64),(!REX2)
+e9: JMP-near Jz (f64),(!REX2)
+ea: JMP-far Ap (i64),(!REX2)
+eb: JMP-short Jb (f64),(!REX2)
ec: IN AL,DX (!REX2)
ed: IN eAX,DX (!REX2)
ee: OUT DX,AL (!REX2)
@@ -478,22 +478,22 @@ AVXcode: 1
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
# 0x0f 0x80-0x8f
# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
-80: JO Jz (f64) (!REX2)
-81: JNO Jz (f64) (!REX2)
-82: JB/JC/JNAE Jz (f64) (!REX2)
-83: JAE/JNB/JNC Jz (f64) (!REX2)
-84: JE/JZ Jz (f64) (!REX2)
-85: JNE/JNZ Jz (f64) (!REX2)
-86: JBE/JNA Jz (f64) (!REX2)
-87: JA/JNBE Jz (f64) (!REX2)
-88: JS Jz (f64) (!REX2)
-89: JNS Jz (f64) (!REX2)
-8a: JP/JPE Jz (f64) (!REX2)
-8b: JNP/JPO Jz (f64) (!REX2)
-8c: JL/JNGE Jz (f64) (!REX2)
-8d: JNL/JGE Jz (f64) (!REX2)
-8e: JLE/JNG Jz (f64) (!REX2)
-8f: JNLE/JG Jz (f64) (!REX2)
+80: JO Jz (f64),(!REX2)
+81: JNO Jz (f64),(!REX2)
+82: JB/JC/JNAE Jz (f64),(!REX2)
+83: JAE/JNB/JNC Jz (f64),(!REX2)
+84: JE/JZ Jz (f64),(!REX2)
+85: JNE/JNZ Jz (f64),(!REX2)
+86: JBE/JNA Jz (f64),(!REX2)
+87: JA/JNBE Jz (f64),(!REX2)
+88: JS Jz (f64),(!REX2)
+89: JNS Jz (f64),(!REX2)
+8a: JP/JPE Jz (f64),(!REX2)
+8b: JNP/JPO Jz (f64),(!REX2)
+8c: JL/JNGE Jz (f64),(!REX2)
+8d: JNL/JGE Jz (f64),(!REX2)
+8e: JLE/JNG Jz (f64),(!REX2)
+8f: JNLE/JG Jz (f64),(!REX2)
# 0x0f 0x90-0x9f
90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
@@ -996,8 +996,8 @@ AVXcode: 4
83: Grp1 Ev,Ib (1A),(es)
# CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL,
# CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ
-84: CTESTSCC (ev)
-85: CTESTSCC (es) | CTESTSCC (66),(es)
+84: CTESTSCC Eb,Gb (ev)
+85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es)
88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es)
8f: POP2 Bq,Rq (000),(11B),(ev)
a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es)
diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk
index 5770c8097f32..2c19d7fc8a85 100644
--- a/tools/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk
@@ -64,6 +64,8 @@ BEGIN {
modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
force64_expr = "\\([df]64\\)"
+ invalid64_expr = "\\(i64\\)"
+ only64_expr = "\\(o64\\)"
rex_expr = "^((REX(\\.[XRWB]+)+)|(REX$))"
rex2_expr = "\\(REX2\\)"
no_rex2_expr = "\\(!REX2\\)"
@@ -319,6 +321,11 @@ function convert_operands(count,opnd, i,j,imm,mod)
if (match(ext, force64_expr))
flags = add_flags(flags, "INAT_FORCE64")
+ # check invalid in 64-bit (and no only64)
+ if (match(ext, invalid64_expr) &&
+ !match($0, only64_expr))
+ flags = add_flags(flags, "INAT_INV64")
+
# check REX2 not allowed
if (match(ext, no_rex2_expr))
flags = add_flags(flags, "INAT_NO_REX2")
diff --git a/tools/counter/.gitignore b/tools/counter/.gitignore
index 9fd290d4bf43..22d8727d2696 100644
--- a/tools/counter/.gitignore
+++ b/tools/counter/.gitignore
@@ -1,2 +1,3 @@
/counter_example
+/counter_watch_events
/include/linux/counter.h
diff --git a/tools/counter/counter_watch_events.c b/tools/counter/counter_watch_events.c
index 107631e0f2e3..15e21b0c5ffd 100644
--- a/tools/counter/counter_watch_events.c
+++ b/tools/counter/counter_watch_events.c
@@ -38,6 +38,7 @@ static const char * const counter_event_type_name[] = {
"COUNTER_EVENT_INDEX",
"COUNTER_EVENT_CHANGE_OF_STATE",
"COUNTER_EVENT_CAPTURE",
+ "COUNTER_EVENT_DIRECTION_CHANGE",
};
static const char * const counter_component_type_name[] = {
@@ -118,6 +119,7 @@ static void print_usage(void)
" evt_index (COUNTER_EVENT_INDEX)\n"
" evt_change_of_state (COUNTER_EVENT_CHANGE_OF_STATE)\n"
" evt_capture (COUNTER_EVENT_CAPTURE)\n"
+ " evt_direction_change (COUNTER_EVENT_DIRECTION_CHANGE)\n"
"\n"
" chan=<n> channel <n> for this watch [default: 0]\n"
" id=<n> component id <n> for this watch [default: 0]\n"
@@ -157,6 +159,7 @@ enum {
WATCH_EVENT_INDEX,
WATCH_EVENT_CHANGE_OF_STATE,
WATCH_EVENT_CAPTURE,
+ WATCH_EVENT_DIRECTION_CHANGE,
WATCH_CHANNEL,
WATCH_ID,
WATCH_PARENT,
@@ -183,6 +186,7 @@ static char * const counter_watch_subopts[WATCH_SUBOPTS_MAX + 1] = {
[WATCH_EVENT_INDEX] = "evt_index",
[WATCH_EVENT_CHANGE_OF_STATE] = "evt_change_of_state",
[WATCH_EVENT_CAPTURE] = "evt_capture",
+ [WATCH_EVENT_DIRECTION_CHANGE] = "evt_direction_change",
/* channel, id, parent */
[WATCH_CHANNEL] = "chan",
[WATCH_ID] = "id",
@@ -278,6 +282,7 @@ int main(int argc, char **argv)
case WATCH_EVENT_INDEX:
case WATCH_EVENT_CHANGE_OF_STATE:
case WATCH_EVENT_CAPTURE:
+ case WATCH_EVENT_DIRECTION_CHANGE:
/* match counter_event_type: subtract enum value */
ret -= WATCH_EVENT_OVERFLOW;
watches[i].event = ret;
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 04ba035d67e9..b9ce3aab15fe 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -24,6 +24,7 @@
#include <sys/poll.h>
#include <sys/utsname.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -677,6 +678,88 @@ static void kvp_process_ipconfig_file(char *cmd,
pclose(file);
}
+static bool kvp_verify_ip_address(const void *address_string)
+{
+ char verify_buf[sizeof(struct in6_addr)];
+
+ if (inet_pton(AF_INET, address_string, verify_buf) == 1)
+ return true;
+ if (inet_pton(AF_INET6, address_string, verify_buf) == 1)
+ return true;
+ return false;
+}
+
+static void kvp_extract_routes(const char *line, void **output, size_t *remaining)
+{
+ static const char needle[] = "via ";
+ const char *match, *haystack = line;
+
+ while ((match = strstr(haystack, needle))) {
+ const char *address, *next_char;
+
+ /* Address starts after needle. */
+ address = match + strlen(needle);
+
+ /* The char following address is a space or end of line. */
+ next_char = strpbrk(address, " \t\\");
+ if (!next_char)
+ next_char = address + strlen(address) + 1;
+
+ /* Enough room for address and semicolon. */
+ if (*remaining >= (next_char - address) + 1) {
+ memcpy(*output, address, next_char - address);
+ /* Terminate string for verification. */
+ memcpy(*output + (next_char - address), "", 1);
+ if (kvp_verify_ip_address(*output)) {
+ /* Advance output buffer. */
+ *output += next_char - address;
+ *remaining -= next_char - address;
+
+ /* Each address needs a trailing semicolon. */
+ memcpy(*output, ";", 1);
+ *output += 1;
+ *remaining -= 1;
+ }
+ }
+ haystack = next_char;
+ }
+}
+
+static void kvp_get_gateway(void *buffer, size_t buffer_len)
+{
+ static const char needle[] = "default ";
+ FILE *f;
+ void *output = buffer;
+ char *line = NULL;
+ size_t alloc_size = 0, remaining = buffer_len - 1;
+ ssize_t num_chars;
+
+ /* Show route information in a single line, for each address family */
+ f = popen("ip --oneline -4 route show;ip --oneline -6 route show", "r");
+ if (!f) {
+ /* Convert buffer into C-String. */
+ memcpy(output, "", 1);
+ return;
+ }
+ while ((num_chars = getline(&line, &alloc_size, f)) > 0) {
+ /* Skip short lines. */
+ if (num_chars <= strlen(needle))
+ continue;
+ /* Skip lines without default route. */
+ if (memcmp(line, needle, strlen(needle)))
+ continue;
+ /* Remove trailing newline to simplify further parsing. */
+ if (line[num_chars - 1] == '\n')
+ line[num_chars - 1] = '\0';
+ /* Search routes after match. */
+ kvp_extract_routes(line + strlen(needle), &output, &remaining);
+ }
+ /* Convert buffer into C-String. */
+ memcpy(output, "", 1);
+ free(line);
+ pclose(f);
+}
+
static void kvp_get_ipconfig_info(char *if_name,
struct hv_kvp_ipaddr_value *buffer)
{
@@ -685,30 +768,7 @@ static void kvp_get_ipconfig_info(char *if_name,
char *p;
FILE *file;
- /*
- * Get the address of default gateway (ipv4).
- */
- sprintf(cmd, "%s %s", "ip route show dev", if_name);
- strcat(cmd, " | awk '/default/ {print $3 }'");
-
- /*
- * Execute the command to gather gateway info.
- */
- kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
- (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0);
-
- /*
- * Get the address of default gateway (ipv6).
- */
- sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name);
- strcat(cmd, " | awk '/default/ {print $3 }'");
-
- /*
- * Execute the command to gather gateway info (ipv6).
- */
- kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
- (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1);
-
+ kvp_get_gateway(buffer->gate_way, sizeof(buffer->gate_way));
/*
* Gather the DNS state.
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index cccf62ea2b8f..eab7b082f19d 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -75,6 +75,7 @@ static const char * const iio_ev_type_text[] = {
[IIO_EV_TYPE_CHANGE] = "change",
[IIO_EV_TYPE_MAG_REFERENCED] = "mag_referenced",
[IIO_EV_TYPE_GESTURE] = "gesture",
+ [IIO_EV_TYPE_FAULT] = "fault",
};
static const char * const iio_ev_dir_text[] = {
@@ -83,6 +84,7 @@ static const char * const iio_ev_dir_text[] = {
[IIO_EV_DIR_FALLING] = "falling",
[IIO_EV_DIR_SINGLETAP] = "singletap",
[IIO_EV_DIR_DOUBLETAP] = "doubletap",
+ [IIO_EV_DIR_FAULT_OPENWIRE] = "openwire",
};
static const char * const iio_modifier_names[] = {
@@ -249,6 +251,7 @@ static bool event_is_known(struct iio_event_data *event)
case IIO_EV_TYPE_MAG_ADAPTIVE:
case IIO_EV_TYPE_CHANGE:
case IIO_EV_TYPE_GESTURE:
+ case IIO_EV_TYPE_FAULT:
break;
default:
return false;
@@ -260,6 +263,7 @@ static bool event_is_known(struct iio_event_data *event)
case IIO_EV_DIR_FALLING:
case IIO_EV_DIR_SINGLETAP:
case IIO_EV_DIR_DOUBLETAP:
+ case IIO_EV_DIR_FAULT_OPENWIRE:
case IIO_EV_DIR_NONE:
break;
default:
diff --git a/tools/include/asm/timex.h b/tools/include/asm/timex.h
new file mode 100644
index 000000000000..5adfe3c6d326
--- /dev/null
+++ b/tools/include/asm/timex.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_LINUX_ASM_TIMEX_H
+#define __TOOLS_LINUX_ASM_TIMEX_H
+
+#include <time.h>
+
+#define cycles_t clock_t
+
+static inline cycles_t get_cycles(void)
+{
+ return clock();
+}
+#endif // __TOOLS_LINUX_ASM_TIMEX_H
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 2a7f260ef9dc..d4d300040d01 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -19,6 +19,7 @@ bool __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits);
bool __bitmap_equal(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits);
+void __bitmap_set(unsigned long *map, unsigned int start, int len);
void __bitmap_clear(unsigned long *map, unsigned int start, int len);
bool __bitmap_intersects(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int bits);
@@ -79,6 +80,11 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
__bitmap_or(dst, src1, src2, nbits);
}
+static inline unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags __maybe_unused)
+{
+ return malloc(bitmap_size(nbits));
+}
+
/**
* bitmap_zalloc - Allocate bitmap
* @nbits: Number of bits
@@ -150,6 +156,21 @@ static inline bool bitmap_intersects(const unsigned long *src1,
return __bitmap_intersects(src1, src2, nbits);
}
+static inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits)
+{
+ if (__builtin_constant_p(nbits) && nbits == 1)
+ __set_bit(start, map);
+ else if (small_const_nbits(start + nbits))
+ *map |= GENMASK(start + nbits - 1, start);
+ else if (__builtin_constant_p(start & BITMAP_MEM_MASK) &&
+ IS_ALIGNED(start, BITMAP_MEM_ALIGNMENT) &&
+ __builtin_constant_p(nbits & BITMAP_MEM_MASK) &&
+ IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT))
+ memset((char *)map + start / 8, 0xff, nbits / 8);
+ else
+ __bitmap_set(map, start, nbits);
+}
+
static inline void bitmap_clear(unsigned long *map, unsigned int start,
unsigned int nbits)
{
diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h
new file mode 100644
index 000000000000..6b8713675765
--- /dev/null
+++ b/tools/include/linux/cfi_types.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Clang Control Flow Integrity (CFI) type definitions.
+ */
+#ifndef _LINUX_CFI_TYPES_H
+#define _LINUX_CFI_TYPES_H
+
+#ifdef __ASSEMBLY__
+#include <linux/linkage.h>
+
+#ifdef CONFIG_CFI_CLANG
+/*
+ * Use the __kcfi_typeid_<function> type identifier symbol to
+ * annotate indirectly called assembly functions. The compiler emits
+ * these symbols for all address-taken function declarations in C
+ * code.
+ */
+#ifndef __CFI_TYPE
+#define __CFI_TYPE(name) \
+ .4byte __kcfi_typeid_##name
+#endif
+
+#define SYM_TYPED_ENTRY(name, linkage, align...) \
+ linkage(name) ASM_NL \
+ align ASM_NL \
+ __CFI_TYPE(name) ASM_NL \
+ name:
+
+#define SYM_TYPED_START(name, linkage, align...) \
+ SYM_TYPED_ENTRY(name, linkage, align)
+
+#else /* CONFIG_CFI_CLANG */
+
+#define SYM_TYPED_START(name, linkage, align...) \
+ SYM_START(name, linkage, align)
+
+#endif /* CONFIG_CFI_CLANG */
+
+#ifndef SYM_TYPED_FUNC_START
+#define SYM_TYPED_FUNC_START(name) \
+ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+#endif
+
+#endif /* __ASSEMBLY__ */
+#endif /* _LINUX_CFI_TYPES_H */
diff --git a/tools/include/linux/container_of.h b/tools/include/linux/container_of.h
new file mode 100644
index 000000000000..c879e14c3dd6
--- /dev/null
+++ b/tools/include/linux/container_of.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_CONTAINER_OF_H
+#define _TOOLS_LINUX_CONTAINER_OF_H
+
+#ifndef container_of
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr: the pointer to the member.
+ * @type: the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *)0)->member) * __mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)); })
+#endif
+
+#endif /* _TOOLS_LINUX_CONTAINER_OF_H */
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 07cfad817d53..c8c18d3908a9 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -11,6 +11,7 @@
#include <linux/panic.h>
#include <endian.h>
#include <byteswap.h>
+#include <linux/container_of.h>
#ifndef UINT_MAX
#define UINT_MAX (~0U)
@@ -25,19 +26,6 @@
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
#endif
-#ifndef container_of
-/**
- * container_of - cast a member of a structure out to the containing structure
- * @ptr: the pointer to the member.
- * @type: the type of the container struct this is embedded in.
- * @member: the name of the member within the struct.
- *
- */
-#define container_of(ptr, type, member) ({ \
- const typeof(((type *)0)->member) * __mptr = (ptr); \
- (type *)((char *)__mptr - offsetof(type, member)); })
-#endif
-
#ifndef max
#define max(x, y) ({ \
typeof(x) _max1 = (x); \
diff --git a/tools/include/linux/math64.h b/tools/include/linux/math64.h
index 4ad45d5943dc..8a67d478bf19 100644
--- a/tools/include/linux/math64.h
+++ b/tools/include/linux/math64.h
@@ -72,4 +72,9 @@ static inline u64 mul_u64_u64_div64(u64 a, u64 b, u64 c)
}
#endif
+static inline u64 div_u64(u64 dividend, u32 divisor)
+{
+ return dividend / divisor;
+}
+
#endif /* _LINUX_MATH64_H */
diff --git a/tools/include/linux/moduleparam.h b/tools/include/linux/moduleparam.h
new file mode 100644
index 000000000000..4c4d05bef0cb
--- /dev/null
+++ b/tools/include/linux/moduleparam.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_MODULE_PARAMS_H
+#define _TOOLS_LINUX_MODULE_PARAMS_H
+
+#define MODULE_PARM_DESC(parm, desc)
+
+#endif // _TOOLS_LINUX_MODULE_PARAMS_H
diff --git a/tools/include/linux/prandom.h b/tools/include/linux/prandom.h
new file mode 100644
index 000000000000..b745041ccd6a
--- /dev/null
+++ b/tools/include/linux/prandom.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_LINUX_PRANDOM_H
+#define __TOOLS_LINUX_PRANDOM_H
+
+#include <linux/types.h>
+
+struct rnd_state {
+ __u32 s1, s2, s3, s4;
+};
+
+/*
+ * Handle minimum values for seeds
+ */
+static inline u32 __seed(u32 x, u32 m)
+{
+ return (x < m) ? x + m : x;
+}
+
+/**
+ * prandom_seed_state - set seed for prandom_u32_state().
+ * @state: pointer to state structure to receive the seed.
+ * @seed: arbitrary 64-bit value to use as a seed.
+ */
+static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
+{
+ u32 i = ((seed >> 32) ^ (seed << 10) ^ seed) & 0xffffffffUL;
+
+ state->s1 = __seed(i, 2U);
+ state->s2 = __seed(i, 8U);
+ state->s3 = __seed(i, 16U);
+ state->s4 = __seed(i, 128U);
+}
+
+/**
+ * prandom_u32_state - seeded pseudo-random number generator.
+ * @state: pointer to state structure holding seeded state.
+ *
+ * This is used for pseudo-randomness with no outside seeding.
+ * For more random results, use get_random_u32().
+ */
+static inline u32 prandom_u32_state(struct rnd_state *state)
+{
+#define TAUSWORTHE(s, a, b, c, d) (((s & c) << d) ^ (((s << a) ^ s) >> b))
+ state->s1 = TAUSWORTHE(state->s1, 6U, 13U, 4294967294U, 18U);
+ state->s2 = TAUSWORTHE(state->s2, 2U, 27U, 4294967288U, 2U);
+ state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U, 7U);
+ state->s4 = TAUSWORTHE(state->s4, 3U, 12U, 4294967168U, 13U);
+
+ return (state->s1 ^ state->s2 ^ state->s3 ^ state->s4);
+}
+#endif // __TOOLS_LINUX_PRANDOM_H
diff --git a/tools/include/linux/refcount.h b/tools/include/linux/refcount.h
index 36cb29bc57c2..1f30956e070d 100644
--- a/tools/include/linux/refcount.h
+++ b/tools/include/linux/refcount.h
@@ -60,6 +60,11 @@ static inline void refcount_set(refcount_t *r, unsigned int n)
atomic_set(&r->refs, n);
}
+static inline void refcount_set_release(refcount_t *r, unsigned int n)
+{
+ atomic_set(&r->refs, n);
+}
+
static inline unsigned int refcount_read(const refcount_t *r)
{
return atomic_read(&r->refs);
diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h
index 51b25e9c4ec7..c87051e2b26f 100644
--- a/tools/include/linux/slab.h
+++ b/tools/include/linux/slab.h
@@ -12,6 +12,7 @@
void *kmalloc(size_t size, gfp_t gfp);
void kfree(void *p);
+void *kmalloc_array(size_t n, size_t size, gfp_t gfp);
bool slab_is_available(void);
diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
index 8519386acd23..4928e33d44ac 100644
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -42,6 +42,8 @@ typedef __s16 s16;
typedef __u8 u8;
typedef __s8 s8;
+typedef unsigned long long ullong;
+
#ifdef __CHECKER__
#define __bitwise __attribute__((bitwise))
#else
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index 1ea2c4c33b86..ef1c27fa3c57 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -85,6 +85,7 @@
/* compatibility flags */
#define MAP_FILE 0
+#define PKEY_UNRESTRICTED 0x0
#define PKEY_DISABLE_ACCESS 0x1
#define PKEY_DISABLE_WRITE 0x2
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 88dc393c2bca..2892a45023af 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat)
__SYSCALL(__NR_listxattrat, sys_listxattrat)
#define __NR_removexattrat 466
__SYSCALL(__NR_removexattrat, sys_removexattrat)
+#define __NR_open_tree_attr 467
+__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
#undef __NR_syscalls
-#define __NR_syscalls 467
+#define __NR_syscalls 468
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 28705ae67784..fd404729b115 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4968,6 +4968,9 @@ union bpf_attr {
* the netns switch takes place from ingress to ingress without
* going through the CPU's backlog queue.
*
+ * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during
+ * the netns switch.
+ *
* The *flags* argument is reserved and must be 0. The helper is
* currently only supported for tc BPF program types at the
* ingress hook and for veth and netkit target device types. The
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 5d32d53508d9..ced0fc3c3aa5 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -79,6 +79,8 @@ enum {
#define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
#define IPPROTO_ETHERNET IPPROTO_ETHERNET
+ IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */
+#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_SMC = 256, /* Shared Memory Communications */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 502ea63b5d2e..b6ae8ad8934b 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -617,10 +617,6 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
-#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
- KVM_X86_DISABLE_EXITS_HLT | \
- KVM_X86_DISABLE_EXITS_PAUSE | \
- KVM_X86_DISABLE_EXITS_CSTATE)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
@@ -933,6 +929,7 @@ struct kvm_enable_cap {
#define KVM_CAP_PRE_FAULT_MEMORY 236
#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
#define KVM_CAP_X86_GUEST_MODE 238
+#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1070,6 +1067,10 @@ struct kvm_dirty_tlb {
#define KVM_REG_SIZE_SHIFT 52
#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL
+
+#define KVM_REG_SIZE(id) \
+ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
#define KVM_REG_SIZE_U8 0x0000000000000000ULL
#define KVM_REG_SIZE_U16 0x0010000000000000ULL
#define KVM_REG_SIZE_U32 0x0020000000000000ULL
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 0524d541d4e3..5fc753c23734 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -385,6 +385,8 @@ enum perf_event_read_format {
*
* @sample_max_stack: Max number of frame pointers in a callchain,
* should be < /proc/sys/kernel/perf_event_max_stack
+ * Max number of entries of branch stack
+ * should be < hardware limit
*/
struct perf_event_attr {
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 887a25286441..f78ee3670dd5 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -98,43 +98,93 @@ struct statx_timestamp {
*/
struct statx {
/* 0x00 */
- __u32 stx_mask; /* What results were written [uncond] */
- __u32 stx_blksize; /* Preferred general I/O size [uncond] */
- __u64 stx_attributes; /* Flags conveying information about the file [uncond] */
+ /* What results were written [uncond] */
+ __u32 stx_mask;
+
+ /* Preferred general I/O size [uncond] */
+ __u32 stx_blksize;
+
+ /* Flags conveying information about the file [uncond] */
+ __u64 stx_attributes;
+
/* 0x10 */
- __u32 stx_nlink; /* Number of hard links */
- __u32 stx_uid; /* User ID of owner */
- __u32 stx_gid; /* Group ID of owner */
- __u16 stx_mode; /* File mode */
+ /* Number of hard links */
+ __u32 stx_nlink;
+
+ /* User ID of owner */
+ __u32 stx_uid;
+
+ /* Group ID of owner */
+ __u32 stx_gid;
+
+ /* File mode */
+ __u16 stx_mode;
__u16 __spare0[1];
+
/* 0x20 */
- __u64 stx_ino; /* Inode number */
- __u64 stx_size; /* File size */
- __u64 stx_blocks; /* Number of 512-byte blocks allocated */
- __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
+ /* Inode number */
+ __u64 stx_ino;
+
+ /* File size */
+ __u64 stx_size;
+
+ /* Number of 512-byte blocks allocated */
+ __u64 stx_blocks;
+
+ /* Mask to show what's supported in stx_attributes */
+ __u64 stx_attributes_mask;
+
/* 0x40 */
- struct statx_timestamp stx_atime; /* Last access time */
- struct statx_timestamp stx_btime; /* File creation time */
- struct statx_timestamp stx_ctime; /* Last attribute change time */
- struct statx_timestamp stx_mtime; /* Last data modification time */
+ /* Last access time */
+ struct statx_timestamp stx_atime;
+
+ /* File creation time */
+ struct statx_timestamp stx_btime;
+
+ /* Last attribute change time */
+ struct statx_timestamp stx_ctime;
+
+ /* Last data modification time */
+ struct statx_timestamp stx_mtime;
+
/* 0x80 */
- __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ /* Device ID of special file [if bdev/cdev] */
+ __u32 stx_rdev_major;
__u32 stx_rdev_minor;
- __u32 stx_dev_major; /* ID of device containing file [uncond] */
+
+ /* ID of device containing file [uncond] */
+ __u32 stx_dev_major;
__u32 stx_dev_minor;
+
/* 0x90 */
__u64 stx_mnt_id;
- __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
- __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
+
+ /* Memory buffer alignment for direct I/O */
+ __u32 stx_dio_mem_align;
+
+ /* File offset alignment for direct I/O */
+ __u32 stx_dio_offset_align;
+
/* 0xa0 */
- __u64 stx_subvol; /* Subvolume identifier */
- __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */
- __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */
+ /* Subvolume identifier */
+ __u64 stx_subvol;
+
+ /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_min;
+
+ /* Max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max;
+
/* 0xb0 */
- __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */
- __u32 __spare1[1];
+ /* Max atomic write segment count */
+ __u32 stx_atomic_write_segments_max;
+
+ /* File offset alignment for direct I/O reads */
+ __u32 stx_dio_read_offset_align;
+
/* 0xb8 */
__u64 __spare3[9]; /* Spare space for future expansion */
+
/* 0x100 */
};
@@ -164,6 +214,7 @@ struct statx {
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
+#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h
index 91fa51a9c31d..85aa327245c6 100644
--- a/tools/include/uapi/linux/types.h
+++ b/tools/include/uapi/linux/types.h
@@ -4,6 +4,8 @@
#include <asm-generic/int-ll64.h>
+#ifndef __ASSEMBLER__
+
/* copied from linux:include/uapi/linux/types.h */
#define __bitwise
typedef __u16 __bitwise __le16;
@@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum;
#define __aligned_be64 __be64 __attribute__((aligned(8)))
#define __aligned_le64 __le64 __attribute__((aligned(8)))
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_LINUX_TYPES_H */
diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c
index 2178862bb114..51255c69754d 100644
--- a/tools/lib/bitmap.c
+++ b/tools/lib/bitmap.c
@@ -101,6 +101,26 @@ bool __bitmap_intersects(const unsigned long *bitmap1,
return false;
}
+void __bitmap_set(unsigned long *map, unsigned int start, int len)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const unsigned int size = start + len;
+ int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+ while (len - bits_to_set >= 0) {
+ *p |= mask_to_set;
+ len -= bits_to_set;
+ bits_to_set = BITS_PER_LONG;
+ mask_to_set = ~0UL;
+ p++;
+ }
+ if (len) {
+ mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+
void __bitmap_clear(unsigned long *map, unsigned int start, int len)
{
unsigned long *p = map + BIT_WORD(start);
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index ffcfd777c451..7fbb50b74c00 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -42,6 +42,7 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
TEST_ARGS := $(if $(V),-v)
INCLUDES = \
+-I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \
-I$(srctree)/tools/lib/perf/include \
-I$(srctree)/tools/lib/ \
-I$(srctree)/tools/include \
@@ -99,7 +100,16 @@ $(LIBAPI)-clean:
$(call QUIET_CLEAN, libapi)
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
-$(LIBPERF_IN): FORCE
+uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm
+ifeq ($(SRCARCH),arm64)
+ syscall-y := $(uapi-asm)/unistd_64.h
+endif
+uapi-asm-generic:
+ $(if $(syscall-y),\
+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \
+ generic=include/uapi/asm-generic $(syscall-y),)
+
+$(LIBPERF_IN): uapi-asm-generic FORCE
$(Q)$(MAKE) $(build)=libperf
$(LIBPERF_A): $(LIBPERF_IN)
@@ -120,7 +130,7 @@ all: fixdep
clean: $(LIBAPI)-clean
$(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \
*.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \
- $(TESTS_STATIC) $(TESTS_SHARED)
+ $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y)
TESTS_IN = tests-in.o
diff --git a/tools/lib/slab.c b/tools/lib/slab.c
index 959997fb0652..981a21404f32 100644
--- a/tools/lib/slab.c
+++ b/tools/lib/slab.c
@@ -36,3 +36,19 @@ void kfree(void *p)
printf("Freeing %p to malloc\n", p);
free(p);
}
+
+void *kmalloc_array(size_t n, size_t size, gfp_t gfp)
+{
+ void *ret;
+
+ if (!(gfp & __GFP_DIRECT_RECLAIM))
+ return NULL;
+
+ ret = calloc(n, size);
+ uatomic_inc(&kmalloc_nr_allocated);
+ if (kmalloc_verbose)
+ printf("Allocating %p from calloc\n", ret);
+ if (gfp & __GFP_ZERO)
+ memset(ret, 0, n * size);
+ return ret;
+}
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index ce32cb35007d..c4da34048ef8 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -364,7 +364,7 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
"Invalid attribute (binary %s)", policy->name);
return -1;
case YNL_PT_NUL_STR:
- if ((!policy->len || len <= policy->len) && !data[len - 1])
+ if (len && (!policy->len || len <= policy->len) && !data[len - 1])
break;
yerr(yarg->ys, YNL_ERROR_ATTR_INVALID,
"Invalid attribute (string %s)", policy->name);
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index a1427c537030..30c0a34b2784 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -162,9 +162,15 @@ class Type(SpecAttr):
def free_needs_iter(self):
return False
- def free(self, ri, var, ref):
+ def _free_lines(self, ri, var, ref):
if self.is_multi_val() or self.presence_type() == 'len':
- ri.cw.p(f'free({var}->{ref}{self.c_name});')
+ return [f'free({var}->{ref}{self.c_name});']
+ return []
+
+ def free(self, ri, var, ref):
+ lines = self._free_lines(ri, var, ref)
+ for line in lines:
+ ri.cw.p(line)
def arg_member(self, ri):
member = self._complex_member_type(ri)
@@ -263,6 +269,10 @@ class Type(SpecAttr):
var = "req"
member = f"{var}->{'.'.join(ref)}"
+ local_vars = []
+ if self.free_needs_iter():
+ local_vars += ['unsigned int i;']
+
code = []
presence = ''
for i in range(0, len(ref)):
@@ -272,6 +282,10 @@ class Type(SpecAttr):
if i == len(ref) - 1 and self.presence_type() != 'bit':
continue
code.append(presence + ' = 1;')
+ ref_path = '.'.join(ref[:-1])
+ if ref_path:
+ ref_path += '.'
+ code += self._free_lines(ri, var, ref_path)
code += self._setter_lines(ri, member, presence)
func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}"
@@ -279,7 +293,8 @@ class Type(SpecAttr):
alloc = bool([x for x in code if 'alloc(' in x])
if free and not alloc:
func_name = '__' + func_name
- ri.cw.write_func('static inline void', func_name, body=code,
+ ri.cw.write_func('static inline void', func_name, local_vars=local_vars,
+ body=code,
args=[f'{type_name(ri, direction, deref=deref)} *{var}'] + self.arg_member(ri))
@@ -482,8 +497,7 @@ class TypeString(Type):
['unsigned int len;']
def _setter_lines(self, ri, member, presence):
- return [f"free({member});",
- f"{presence}_len = strlen({self.c_name});",
+ return [f"{presence}_len = strlen({self.c_name});",
f"{member} = malloc({presence}_len + 1);",
f'memcpy({member}, {self.c_name}, {presence}_len);',
f'{member}[{presence}_len] = 0;']
@@ -536,8 +550,7 @@ class TypeBinary(Type):
['unsigned int len;']
def _setter_lines(self, ri, member, presence):
- return [f"free({member});",
- f"{presence}_len = len;",
+ return [f"{presence}_len = len;",
f"{member} = malloc({presence}_len);",
f'memcpy({member}, {self.c_name}, {presence}_len);']
@@ -574,12 +587,14 @@ class TypeNest(Type):
def _complex_member_type(self, ri):
return self.nested_struct_type
- def free(self, ri, var, ref):
+ def _free_lines(self, ri, var, ref):
+ lines = []
at = '&'
if self.is_recursive_for_op(ri):
at = ''
- ri.cw.p(f'if ({var}->{ref}{self.c_name})')
- ri.cw.p(f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});')
+ lines += [f'if ({var}->{ref}{self.c_name})']
+ lines += [f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});']
+ return lines
def _attr_typol(self):
return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
@@ -632,15 +647,19 @@ class TypeMultiAttr(Type):
def free_needs_iter(self):
return 'type' not in self.attr or self.attr['type'] == 'nest'
- def free(self, ri, var, ref):
+ def _free_lines(self, ri, var, ref):
+ lines = []
if self.attr['type'] in scalars:
- ri.cw.p(f"free({var}->{ref}{self.c_name});")
+ lines += [f"free({var}->{ref}{self.c_name});"]
elif 'type' not in self.attr or self.attr['type'] == 'nest':
- ri.cw.p(f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)")
- ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);')
- ri.cw.p(f"free({var}->{ref}{self.c_name});")
+ lines += [
+ f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)",
+ f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);',
+ f"free({var}->{ref}{self.c_name});",
+ ]
else:
raise Exception(f"Free of MultiAttr sub-type {self.attr['type']} not supported yet")
+ return lines
def _attr_policy(self, policy):
return self.base_type._attr_policy(policy)
@@ -654,10 +673,10 @@ class TypeMultiAttr(Type):
def attr_put(self, ri, var):
if self.attr['type'] in scalars:
put_type = self.type
- ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)")
+ ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);")
elif 'type' not in self.attr or self.attr['type'] == 'nest':
- ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)")
+ ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " +
f"{self.enum_name}, &{var}->{self.c_name}[i])")
else:
@@ -666,8 +685,7 @@ class TypeMultiAttr(Type):
def _setter_lines(self, ri, member, presence):
# For multi-attr we have a count, not presence, hack up the presence
presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name
- return [f"free({member});",
- f"{member} = {self.c_name};",
+ return [f"{member} = {self.c_name};",
f"{presence} = n_{self.c_name};"]
@@ -696,8 +714,11 @@ class TypeArrayNest(Type):
def _attr_get(self, ri, var):
local_vars = ['const struct nlattr *attr2;']
get_lines = [f'attr_{self.c_name} = attr;',
- 'ynl_attr_for_each_nested(attr2, attr)',
- f'\t{var}->n_{self.c_name}++;']
+ 'ynl_attr_for_each_nested(attr2, attr) {',
+ '\tif (ynl_attr_validate(yarg, attr2))',
+ '\t\treturn YNL_PARSE_CB_ERROR;',
+ f'\t{var}->n_{self.c_name}++;',
+ '}']
return get_lines, None, local_vars
@@ -755,6 +776,7 @@ class Struct:
self.request = False
self.reply = False
self.recursive = False
+ self.in_multi_val = False # used by a MultiAttr or and legacy arrays
self.attr_list = []
self.attrs = dict()
@@ -1122,6 +1144,10 @@ class Family(SpecFamily):
if attr in rs_members['reply']:
self.pure_nested_structs[nested].reply = True
+ if spec.is_multi_val():
+ child = self.pure_nested_structs.get(nested)
+ child.in_multi_val = True
+
self._sort_pure_types()
# Propagate the request / reply / recursive
@@ -1136,6 +1162,8 @@ class Family(SpecFamily):
struct.child_nests.update(child.child_nests)
child.request |= struct.request
child.reply |= struct.reply
+ if spec.is_multi_val():
+ child.in_multi_val = True
if attr_set in struct.child_nests:
struct.recursive = True
@@ -1399,9 +1427,9 @@ class CodeWriter:
def write_func(self, qual_ret, name, body, args=None, local_vars=None):
self.write_func_prot(qual_ret=qual_ret, name=name, args=args)
+ self.block_start()
self.write_func_lvar(local_vars=local_vars)
- self.block_start()
for line in body:
self.p(line)
self.block_end()
@@ -1644,11 +1672,23 @@ def put_req_nested_prototype(ri, struct, suffix=';'):
def put_req_nested(ri, struct):
+ local_vars = []
+ init_lines = []
+
+ local_vars.append('struct nlattr *nest;')
+ init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);")
+
+ for _, arg in struct.member_list():
+ if arg.presence_type() == 'count':
+ local_vars.append('unsigned int i;')
+ break
+
put_req_nested_prototype(ri, struct, suffix='')
ri.cw.block_start()
- ri.cw.write_func_lvar('struct nlattr *nest;')
+ ri.cw.write_func_lvar(local_vars)
- ri.cw.p("nest = ynl_attr_nest_start(nlh, attr_type);")
+ for line in init_lines:
+ ri.cw.p(line)
for _, arg in struct.member_list():
arg.attr_put(ri, "obj")
@@ -1850,6 +1890,11 @@ def print_req(ri):
local_vars += ['size_t hdr_len;',
'void *hdr;']
+ for _, attr in ri.struct["request"].member_list():
+ if attr.presence_type() == 'count':
+ local_vars += ['unsigned int i;']
+ break
+
print_prototype(ri, direction, terminate=False)
ri.cw.block_start()
ri.cw.write_func_lvar(local_vars)
@@ -2941,6 +2986,9 @@ def main():
for attr_set, struct in parsed.pure_nested_structs.items():
ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set)
print_type_full(ri, struct)
+ if struct.request and struct.in_multi_val:
+ free_rsp_nested_prototype(ri)
+ cw.nl()
for op_name, op in parsed.ops.items():
cw.p(f"/* ============== {op.enum_name} ============== */")
diff --git a/tools/objtool/Documentation/objtool.txt b/tools/objtool/Documentation/objtool.txt
index 28ac57b9e102..9e97fc25b2d8 100644
--- a/tools/objtool/Documentation/objtool.txt
+++ b/tools/objtool/Documentation/objtool.txt
@@ -34,7 +34,7 @@ Objtool has the following features:
- Return thunk annotation -- annotates all return thunk sites so kernel
can patch them inline, depending on enabled mitigations
-- Return thunk training valiation -- validate that all entry paths
+- Return thunk untraining validation -- validate that all entry paths
untrain a "safe return" before the first return (or call)
- Non-instrumentation validation -- validates non-instrumentable
@@ -281,8 +281,8 @@ the objtool maintainers.
If the error is for an asm file, and func() is indeed a callable
function, add proper frame pointer logic using the FRAME_BEGIN and
FRAME_END macros. Otherwise, if it's not a callable function, remove
- its ELF function annotation by changing ENDPROC to END, and instead
- use the manual unwind hint macros in asm/unwind_hints.h.
+ its ELF function annotation by using SYM_CODE_{START,END} and use the
+ manual unwind hint macros in asm/unwind_hints.h.
If it's a GCC-compiled .c file, the error may be because the function
uses an inline asm() statement which has a "call" instruction. An
@@ -352,7 +352,7 @@ the objtool maintainers.
This is a kernel entry/exit instruction like sysenter or iret. Such
instructions aren't allowed in a callable function, and are most
likely part of the kernel entry code. Such code should probably be
- placed in a SYM_FUNC_CODE block with unwind hints.
+ placed in a SYM_CODE_{START,END} block with unwind hints.
6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
@@ -381,7 +381,7 @@ the objtool maintainers.
Another possibility is that the code has some asm or inline asm which
does some unusual things to the stack or the frame pointer. In such
- cases it's probably appropriate to use SYM_FUNC_CODE with unwind
+ cases it's probably appropriate to use SYM_CODE_{START,END} with unwind
hints.
diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c
index 02e490555966..b6fdc68053cc 100644
--- a/tools/objtool/arch/loongarch/decode.c
+++ b/tools/objtool/arch/loongarch/decode.c
@@ -63,7 +63,7 @@ static bool is_loongarch(const struct elf *elf)
if (elf->ehdr.e_machine == EM_LOONGARCH)
return true;
- WARN("unexpected ELF machine type %d", elf->ehdr.e_machine);
+ ERROR("unexpected ELF machine type %d", elf->ehdr.e_machine);
return false;
}
@@ -327,8 +327,10 @@ const char *arch_nop_insn(int len)
{
static u32 nop;
- if (len != LOONGARCH_INSN_SIZE)
- WARN("invalid NOP size: %d\n", len);
+ if (len != LOONGARCH_INSN_SIZE) {
+ ERROR("invalid NOP size: %d\n", len);
+ return NULL;
+ }
nop = LOONGARCH_INSN_NOP;
@@ -339,8 +341,10 @@ const char *arch_ret_insn(int len)
{
static u32 ret;
- if (len != LOONGARCH_INSN_SIZE)
- WARN("invalid RET size: %d\n", len);
+ if (len != LOONGARCH_INSN_SIZE) {
+ ERROR("invalid RET size: %d\n", len);
+ return NULL;
+ }
emit_jirl((union loongarch_instruction *)&ret, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0);
diff --git a/tools/objtool/arch/loongarch/orc.c b/tools/objtool/arch/loongarch/orc.c
index 873536d009d9..b58c5ff443c9 100644
--- a/tools/objtool/arch/loongarch/orc.c
+++ b/tools/objtool/arch/loongarch/orc.c
@@ -41,7 +41,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct
orc->type = ORC_TYPE_REGS_PARTIAL;
break;
default:
- WARN_INSN(insn, "unknown unwind hint type %d", cfi->type);
+ ERROR_INSN(insn, "unknown unwind hint type %d", cfi->type);
return -1;
}
@@ -55,7 +55,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct
orc->sp_reg = ORC_REG_FP;
break;
default:
- WARN_INSN(insn, "unknown CFA base reg %d", cfi->cfa.base);
+ ERROR_INSN(insn, "unknown CFA base reg %d", cfi->cfa.base);
return -1;
}
@@ -72,7 +72,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct
orc->fp_reg = ORC_REG_FP;
break;
default:
- WARN_INSN(insn, "unknown FP base reg %d", fp->base);
+ ERROR_INSN(insn, "unknown FP base reg %d", fp->base);
return -1;
}
@@ -89,7 +89,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct
orc->ra_reg = ORC_REG_FP;
break;
default:
- WARN_INSN(insn, "unknown RA base reg %d", ra->base);
+ ERROR_INSN(insn, "unknown RA base reg %d", ra->base);
return -1;
}
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 7567c893f45e..331b9a744410 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -36,7 +36,7 @@ static int is_x86_64(const struct elf *elf)
case EM_386:
return 0;
default:
- WARN("unexpected ELF machine type %d", elf->ehdr.e_machine);
+ ERROR("unexpected ELF machine type %d", elf->ehdr.e_machine);
return -1;
}
}
@@ -173,7 +173,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
ret = insn_decode(&ins, sec->data->d_buf + offset, maxlen,
x86_64 ? INSN_MODE_64 : INSN_MODE_32);
if (ret < 0) {
- WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
+ ERROR("can't decode instruction at %s:0x%lx", sec->name, offset);
return -1;
}
@@ -321,7 +321,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
break;
default:
- /* WARN ? */
+ /* ERROR ? */
break;
}
@@ -522,7 +522,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
case INAT_PFX_REPNE:
if (modrm == 0xca)
/* eretu/erets */
- insn->type = INSN_CONTEXT_SWITCH;
+ insn->type = INSN_SYSRET;
break;
default:
if (modrm == 0xca)
@@ -535,11 +535,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
insn->type = INSN_JUMP_CONDITIONAL;
- } else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
- op2 == 0x35) {
+ } else if (op2 == 0x05 || op2 == 0x34) {
- /* sysenter, sysret */
- insn->type = INSN_CONTEXT_SWITCH;
+ /* syscall, sysenter */
+ insn->type = INSN_SYSCALL;
+
+ } else if (op2 == 0x07 || op2 == 0x35) {
+
+ /* sysret, sysexit */
+ insn->type = INSN_SYSRET;
} else if (op2 == 0x0b || op2 == 0xb9) {
@@ -561,8 +565,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
if (ins.prefixes.nbytes == 1 &&
ins.prefixes.bytes[0] == 0xf2) {
/* ENQCMD cannot be used in the kernel. */
- WARN("ENQCMD instruction at %s:%lx", sec->name,
- offset);
+ WARN("ENQCMD instruction at %s:%lx", sec->name, offset);
}
} else if (op2 == 0xa0 || op2 == 0xa8) {
@@ -646,7 +649,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
if (disp->sym->type == STT_SECTION)
func = find_symbol_by_offset(disp->sym->sec, reloc_addend(disp));
if (!func) {
- WARN("no func for pv_ops[]");
+ ERROR("no func for pv_ops[]");
return -1;
}
@@ -677,7 +680,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
case 0xca: /* retf */
case 0xcb: /* retf */
- insn->type = INSN_CONTEXT_SWITCH;
+ insn->type = INSN_SYSRET;
break;
case 0xe0: /* loopne */
@@ -722,7 +725,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
} else if (modrm_reg == 5) {
/* jmpf */
- insn->type = INSN_CONTEXT_SWITCH;
+ insn->type = INSN_SYSRET;
} else if (modrm_reg == 6) {
@@ -776,7 +779,7 @@ const char *arch_nop_insn(int len)
};
if (len < 1 || len > 5) {
- WARN("invalid NOP size: %d\n", len);
+ ERROR("invalid NOP size: %d\n", len);
return NULL;
}
@@ -796,7 +799,7 @@ const char *arch_ret_insn(int len)
};
if (len < 1 || len > 5) {
- WARN("invalid RET size: %d\n", len);
+ ERROR("invalid RET size: %d\n", len);
return NULL;
}
@@ -839,12 +842,14 @@ int arch_decode_hint_reg(u8 sp_reg, int *base)
bool arch_is_retpoline(struct symbol *sym)
{
- return !strncmp(sym->name, "__x86_indirect_", 15);
+ return !strncmp(sym->name, "__x86_indirect_", 15) ||
+ !strncmp(sym->name, "__pi___x86_indirect_", 20);
}
bool arch_is_rethunk(struct symbol *sym)
{
- return !strcmp(sym->name, "__x86_return_thunk");
+ return !strcmp(sym->name, "__x86_return_thunk") ||
+ !strcmp(sym->name, "__pi___x86_return_thunk");
}
bool arch_is_embedded_insn(struct symbol *sym)
diff --git a/tools/objtool/arch/x86/orc.c b/tools/objtool/arch/x86/orc.c
index b6cd943e87f9..7176b9ec5b05 100644
--- a/tools/objtool/arch/x86/orc.c
+++ b/tools/objtool/arch/x86/orc.c
@@ -40,7 +40,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct
orc->type = ORC_TYPE_REGS_PARTIAL;
break;
default:
- WARN_INSN(insn, "unknown unwind hint type %d", cfi->type);
+ ERROR_INSN(insn, "unknown unwind hint type %d", cfi->type);
return -1;
}
@@ -72,7 +72,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct
orc->sp_reg = ORC_REG_DX;
break;
default:
- WARN_INSN(insn, "unknown CFA base reg %d", cfi->cfa.base);
+ ERROR_INSN(insn, "unknown CFA base reg %d", cfi->cfa.base);
return -1;
}
@@ -87,7 +87,7 @@ int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruct
orc->bp_reg = ORC_REG_BP;
break;
default:
- WARN_INSN(insn, "unknown BP base reg %d", bp->base);
+ ERROR_INSN(insn, "unknown BP base reg %d", bp->base);
return -1;
}
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index 9c1c9df09aaa..06ca4a2659a4 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -3,11 +3,9 @@
#include <objtool/special.h>
#include <objtool/builtin.h>
+#include <objtool/warn.h>
-#define X86_FEATURE_POPCNT (4 * 32 + 23)
-#define X86_FEATURE_SMAP (9 * 32 + 20)
-
-void arch_handle_alternative(unsigned short feature, struct special_alt *alt)
+void arch_handle_alternative(struct special_alt *alt)
{
static struct special_alt *group, *prev;
@@ -31,34 +29,6 @@ void arch_handle_alternative(unsigned short feature, struct special_alt *alt)
} else group = alt;
prev = alt;
-
- switch (feature) {
- case X86_FEATURE_SMAP:
- /*
- * If UACCESS validation is enabled; force that alternative;
- * otherwise force it the other way.
- *
- * What we want to avoid is having both the original and the
- * alternative code flow at the same time, in that case we can
- * find paths that see the STAC but take the NOP instead of
- * CLAC and the other way around.
- */
- if (opts.uaccess)
- alt->skip_orig = true;
- else
- alt->skip_alt = true;
- break;
- case X86_FEATURE_POPCNT:
- /*
- * It has been requested that we don't validate the !POPCNT
- * feature path which is a "very very small percentage of
- * machines".
- */
- alt->skip_orig = true;
- break;
- default:
- break;
- }
}
bool arch_support_alt_relocation(struct special_alt *special_alt,
@@ -156,8 +126,10 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
* indicates a rare GCC quirk/bug which can leave dead
* code behind.
*/
- if (reloc_type(text_reloc) == R_X86_64_PC32)
+ if (!file->ignore_unreachables && reloc_type(text_reloc) == R_X86_64_PC32) {
+ WARN_INSN(insn, "ignoring unreachables due to jump table quirk");
file->ignore_unreachables = true;
+ }
*table_size = 0;
return rodata_reloc;
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 5f761f420b8c..80239843e9f0 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -8,18 +8,18 @@
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
+#include <errno.h>
#include <sys/stat.h>
#include <sys/sendfile.h>
#include <objtool/builtin.h>
#include <objtool/objtool.h>
+#include <objtool/warn.h>
-#define ERROR(format, ...) \
- fprintf(stderr, \
- "error: objtool: " format "\n", \
- ##__VA_ARGS__)
+#define ORIG_SUFFIX ".orig"
+int orig_argc;
+static char **orig_argv;
const char *objname;
-
struct opts opts;
static const char * const check_usage[] = {
@@ -194,30 +194,30 @@ static int copy_file(const char *src, const char *dst)
src_fd = open(src, O_RDONLY);
if (src_fd == -1) {
- ERROR("can't open '%s' for reading", src);
+ ERROR("can't open %s for reading: %s", src, strerror(errno));
return 1;
}
dst_fd = open(dst, O_WRONLY | O_CREAT | O_TRUNC, 0400);
if (dst_fd == -1) {
- ERROR("can't open '%s' for writing", dst);
+ ERROR("can't open %s for writing: %s", dst, strerror(errno));
return 1;
}
if (fstat(src_fd, &stat) == -1) {
- perror("fstat");
+ ERROR_GLIBC("fstat");
return 1;
}
if (fchmod(dst_fd, stat.st_mode) == -1) {
- perror("fchmod");
+ ERROR_GLIBC("fchmod");
return 1;
}
for (to_copy = stat.st_size; to_copy > 0; to_copy -= copied) {
copied = sendfile(dst_fd, src_fd, &offset, to_copy);
if (copied == -1) {
- perror("sendfile");
+ ERROR_GLIBC("sendfile");
return 1;
}
}
@@ -227,39 +227,73 @@ static int copy_file(const char *src, const char *dst)
return 0;
}
-static char **save_argv(int argc, const char **argv)
+static void save_argv(int argc, const char **argv)
{
- char **orig_argv;
-
orig_argv = calloc(argc, sizeof(char *));
if (!orig_argv) {
- perror("calloc");
- return NULL;
+ ERROR_GLIBC("calloc");
+ exit(1);
}
for (int i = 0; i < argc; i++) {
orig_argv[i] = strdup(argv[i]);
if (!orig_argv[i]) {
- perror("strdup");
- return NULL;
+ ERROR_GLIBC("strdup(%s)", argv[i]);
+ exit(1);
}
};
-
- return orig_argv;
}
-#define ORIG_SUFFIX ".orig"
+void print_args(void)
+{
+ char *backup = NULL;
+
+ if (opts.output || opts.dryrun)
+ goto print;
+
+ /*
+ * Make a backup before kbuild deletes the file so the error
+ * can be recreated without recompiling or relinking.
+ */
+ backup = malloc(strlen(objname) + strlen(ORIG_SUFFIX) + 1);
+ if (!backup) {
+ ERROR_GLIBC("malloc");
+ goto print;
+ }
+
+ strcpy(backup, objname);
+ strcat(backup, ORIG_SUFFIX);
+ if (copy_file(objname, backup)) {
+ backup = NULL;
+ goto print;
+ }
+
+print:
+ /*
+ * Print the cmdline args to make it easier to recreate. If '--output'
+ * wasn't used, add it to the printed args with the backup as input.
+ */
+ fprintf(stderr, "%s", orig_argv[0]);
+
+ for (int i = 1; i < orig_argc; i++) {
+ char *arg = orig_argv[i];
+
+ if (backup && !strcmp(arg, objname))
+ fprintf(stderr, " %s -o %s", backup, objname);
+ else
+ fprintf(stderr, " %s", arg);
+ }
+
+ fprintf(stderr, "\n");
+}
int objtool_run(int argc, const char **argv)
{
struct objtool_file *file;
- char *backup = NULL;
- char **orig_argv;
int ret = 0;
- orig_argv = save_argv(argc, argv);
- if (!orig_argv)
- return 1;
+ orig_argc = argc;
+ save_argv(argc, argv);
cmd_parse_options(argc, argv, check_usage);
@@ -282,59 +316,19 @@ int objtool_run(int argc, const char **argv)
file = objtool_open_read(objname);
if (!file)
- goto err;
+ return 1;
if (!opts.link && has_multiple_files(file->elf)) {
ERROR("Linked object requires --link");
- goto err;
+ return 1;
}
ret = check(file);
if (ret)
- goto err;
+ return ret;
if (!opts.dryrun && file->elf->changed && elf_write(file->elf))
- goto err;
-
- return 0;
-
-err:
- if (opts.dryrun)
- goto err_msg;
-
- if (opts.output) {
- unlink(opts.output);
- goto err_msg;
- }
-
- /*
- * Make a backup before kbuild deletes the file so the error
- * can be recreated without recompiling or relinking.
- */
- backup = malloc(strlen(objname) + strlen(ORIG_SUFFIX) + 1);
- if (!backup) {
- perror("malloc");
- return 1;
- }
-
- strcpy(backup, objname);
- strcat(backup, ORIG_SUFFIX);
- if (copy_file(objname, backup))
return 1;
-err_msg:
- fprintf(stderr, "%s", orig_argv[0]);
-
- for (int i = 1; i < argc; i++) {
- char *arg = orig_argv[i];
-
- if (backup && !strcmp(arg, objname))
- fprintf(stderr, " %s -o %s", backup, objname);
- else
- fprintf(stderr, " %s", arg);
- }
-
- fprintf(stderr, "\n");
-
- return 1;
+ return 0;
}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index ca3435acc326..b21b12ec88d9 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -25,7 +25,6 @@
struct alternative {
struct alternative *next;
struct instruction *insn;
- bool skip_orig;
};
static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache;
@@ -226,7 +225,9 @@ static bool is_rust_noreturn(const struct symbol *func)
str_ends_with(func->name, "_4core9panicking14panic_nounwind") ||
str_ends_with(func->name, "_4core9panicking18panic_bounds_check") ||
str_ends_with(func->name, "_4core9panicking19assert_failed_inner") ||
+ str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") ||
str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") ||
+ str_ends_with(func->name, "_7___rustc17rust_begin_unwind") ||
strstr(func->name, "_4core9panicking13assert_failed") ||
strstr(func->name, "_4core9panicking11panic_const24panic_const_") ||
(strstr(func->name, "_4core5slice5index24slice_") &&
@@ -341,12 +342,7 @@ static void init_insn_state(struct objtool_file *file, struct insn_state *state,
memset(state, 0, sizeof(*state));
init_cfi_state(&state->cfi);
- /*
- * We need the full vmlinux for noinstr validation, otherwise we can
- * not correctly determine insn_call_dest(insn)->sec (external symbols
- * do not have a section).
- */
- if (opts.link && opts.noinstr && sec)
+ if (opts.noinstr && sec)
state->noinstr = sec->noinstr;
}
@@ -354,7 +350,7 @@ static struct cfi_state *cfi_alloc(void)
{
struct cfi_state *cfi = calloc(1, sizeof(struct cfi_state));
if (!cfi) {
- WARN("calloc failed");
+ ERROR_GLIBC("calloc");
exit(1);
}
nr_cfi++;
@@ -410,7 +406,7 @@ static void *cfi_hash_alloc(unsigned long size)
PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANON, -1, 0);
if (cfi_hash == (void *)-1L) {
- WARN("mmap fail cfi_hash");
+ ERROR_GLIBC("mmap fail cfi_hash");
cfi_hash = NULL;
} else if (opts.stats) {
printf("cfi_bits: %d\n", cfi_bits);
@@ -466,7 +462,7 @@ static int decode_instructions(struct objtool_file *file)
if (!insns || idx == INSN_CHUNK_MAX) {
insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE);
if (!insns) {
- WARN("malloc failed");
+ ERROR_GLIBC("calloc");
return -1;
}
idx = 0;
@@ -501,8 +497,6 @@ static int decode_instructions(struct objtool_file *file)
nr_insns++;
}
-// printf("%s: last chunk used: %d\n", sec->name, (int)idx);
-
sec_for_each_sym(sec, func) {
if (func->type != STT_NOTYPE && func->type != STT_FUNC)
continue;
@@ -511,8 +505,7 @@ static int decode_instructions(struct objtool_file *file)
/* Heuristic: likely an "end" symbol */
if (func->type == STT_NOTYPE)
continue;
- WARN("%s(): STT_FUNC at end of section",
- func->name);
+ ERROR("%s(): STT_FUNC at end of section", func->name);
return -1;
}
@@ -520,8 +513,7 @@ static int decode_instructions(struct objtool_file *file)
continue;
if (!find_insn(file, sec, func->offset)) {
- WARN("%s(): can't find starting instruction",
- func->name);
+ ERROR("%s(): can't find starting instruction", func->name);
return -1;
}
@@ -568,14 +560,20 @@ static int add_pv_ops(struct objtool_file *file, const char *symname)
if (!reloc)
break;
+ idx = (reloc_offset(reloc) - sym->offset) / sizeof(unsigned long);
+
func = reloc->sym;
if (func->type == STT_SECTION)
func = find_symbol_by_offset(reloc->sym->sec,
reloc_addend(reloc));
+ if (!func) {
+ ERROR_FUNC(reloc->sym->sec, reloc_addend(reloc),
+ "can't find func at %s[%d]", symname, idx);
+ return -1;
+ }
- idx = (reloc_offset(reloc) - sym->offset) / sizeof(unsigned long);
-
- objtool_pv_add(file, idx, func);
+ if (objtool_pv_add(file, idx, func))
+ return -1;
off = reloc_offset(reloc) + 1;
if (off > end)
@@ -599,7 +597,7 @@ static int init_pv_ops(struct objtool_file *file)
};
const char *pv_ops;
struct symbol *sym;
- int idx, nr;
+ int idx, nr, ret;
if (!opts.noinstr)
return 0;
@@ -612,14 +610,19 @@ static int init_pv_ops(struct objtool_file *file)
nr = sym->len / sizeof(unsigned long);
file->pv_ops = calloc(sizeof(struct pv_state), nr);
- if (!file->pv_ops)
+ if (!file->pv_ops) {
+ ERROR_GLIBC("calloc");
return -1;
+ }
for (idx = 0; idx < nr; idx++)
INIT_LIST_HEAD(&file->pv_ops[idx].targets);
- for (idx = 0; (pv_ops = pv_ops_tables[idx]); idx++)
- add_pv_ops(file, pv_ops);
+ for (idx = 0; (pv_ops = pv_ops_tables[idx]); idx++) {
+ ret = add_pv_ops(file, pv_ops);
+ if (ret)
+ return ret;
+ }
return 0;
}
@@ -667,13 +670,12 @@ static int create_static_call_sections(struct objtool_file *file)
/* find key symbol */
key_name = strdup(insn_call_dest(insn)->name);
if (!key_name) {
- perror("strdup");
+ ERROR_GLIBC("strdup");
return -1;
}
if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR,
STATIC_CALL_TRAMP_PREFIX_LEN)) {
- WARN("static_call: trampoline name malformed: %s", key_name);
- free(key_name);
+ ERROR("static_call: trampoline name malformed: %s", key_name);
return -1;
}
tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN;
@@ -682,8 +684,7 @@ static int create_static_call_sections(struct objtool_file *file)
key_sym = find_symbol_by_name(file->elf, tmp);
if (!key_sym) {
if (!opts.module) {
- WARN("static_call: can't find static_call_key symbol: %s", tmp);
- free(key_name);
+ ERROR("static_call: can't find static_call_key symbol: %s", tmp);
return -1;
}
@@ -698,7 +699,6 @@ static int create_static_call_sections(struct objtool_file *file)
*/
key_sym = insn_call_dest(insn);
}
- free(key_name);
/* populate reloc for 'key' */
if (!elf_init_reloc_data_sym(file->elf, sec,
@@ -829,8 +829,11 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
if (opts.module && sym && sym->type == STT_FUNC &&
insn->offset == sym->offset &&
(!strcmp(sym->name, "init_module") ||
- !strcmp(sym->name, "cleanup_module")))
- WARN("%s(): not an indirect call target", sym->name);
+ !strcmp(sym->name, "cleanup_module"))) {
+ ERROR("%s(): Magic init_module() function name is deprecated, use module_init(fn) instead",
+ sym->name);
+ return -1;
+ }
if (!elf_init_reloc_text_sym(file->elf, sec,
idx * sizeof(int), idx,
@@ -979,16 +982,15 @@ static int create_direct_call_sections(struct objtool_file *file)
/*
* Warnings shouldn't be reported for ignored functions.
*/
-static void add_ignores(struct objtool_file *file)
+static int add_ignores(struct objtool_file *file)
{
- struct instruction *insn;
struct section *rsec;
struct symbol *func;
struct reloc *reloc;
rsec = find_section_by_name(file->elf, ".rela.discard.func_stack_frame_non_standard");
if (!rsec)
- return;
+ return 0;
for_each_reloc(rsec, reloc) {
switch (reloc->sym->type) {
@@ -1003,14 +1005,17 @@ static void add_ignores(struct objtool_file *file)
break;
default:
- WARN("unexpected relocation symbol type in %s: %d",
- rsec->name, reloc->sym->type);
- continue;
+ ERROR("unexpected relocation symbol type in %s: %d",
+ rsec->name, reloc->sym->type);
+ return -1;
}
- func_for_each_insn(file, func, insn)
- insn->ignore = true;
+ func->ignore = true;
+ if (func->cfunc)
+ func->cfunc->ignore = true;
}
+
+ return 0;
}
/*
@@ -1188,12 +1193,15 @@ static const char *uaccess_safe_builtin[] = {
"__ubsan_handle_load_invalid_value",
/* STACKLEAK */
"stackleak_track_stack",
+ /* TRACE_BRANCH_PROFILING */
+ "ftrace_likely_update",
+ /* STACKPROTECTOR */
+ "__stack_chk_fail",
/* misc */
"csum_partial_copy_generic",
"copy_mc_fragile",
"copy_mc_fragile_handle_tail",
"copy_mc_enhanced_fast_string",
- "ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
"rep_stos_alternative",
"rep_movs_alternative",
"__copy_user_nocache",
@@ -1275,7 +1283,7 @@ static void remove_insn_ops(struct instruction *insn)
insn->stack_ops = NULL;
}
-static void annotate_call_site(struct objtool_file *file,
+static int annotate_call_site(struct objtool_file *file,
struct instruction *insn, bool sibling)
{
struct reloc *reloc = insn_reloc(file, insn);
@@ -1286,12 +1294,12 @@ static void annotate_call_site(struct objtool_file *file,
if (sym->static_call_tramp) {
list_add_tail(&insn->call_node, &file->static_call_list);
- return;
+ return 0;
}
if (sym->retpoline_thunk) {
list_add_tail(&insn->call_node, &file->retpoline_call_list);
- return;
+ return 0;
}
/*
@@ -1303,10 +1311,12 @@ static void annotate_call_site(struct objtool_file *file,
if (reloc)
set_reloc_type(file->elf, reloc, R_NONE);
- elf_write_insn(file->elf, insn->sec,
- insn->offset, insn->len,
- sibling ? arch_ret_insn(insn->len)
- : arch_nop_insn(insn->len));
+ if (elf_write_insn(file->elf, insn->sec,
+ insn->offset, insn->len,
+ sibling ? arch_ret_insn(insn->len)
+ : arch_nop_insn(insn->len))) {
+ return -1;
+ }
insn->type = sibling ? INSN_RETURN : INSN_NOP;
@@ -1320,7 +1330,7 @@ static void annotate_call_site(struct objtool_file *file,
insn->retpoline_safe = true;
}
- return;
+ return 0;
}
if (opts.mcount && sym->fentry) {
@@ -1330,15 +1340,17 @@ static void annotate_call_site(struct objtool_file *file,
if (reloc)
set_reloc_type(file->elf, reloc, R_NONE);
- elf_write_insn(file->elf, insn->sec,
- insn->offset, insn->len,
- arch_nop_insn(insn->len));
+ if (elf_write_insn(file->elf, insn->sec,
+ insn->offset, insn->len,
+ arch_nop_insn(insn->len))) {
+ return -1;
+ }
insn->type = INSN_NOP;
}
list_add_tail(&insn->call_node, &file->mcount_loc_list);
- return;
+ return 0;
}
if (insn->type == INSN_CALL && !insn->sec->init &&
@@ -1347,14 +1359,16 @@ static void annotate_call_site(struct objtool_file *file,
if (!sibling && dead_end_function(file, sym))
insn->dead_end = true;
+
+ return 0;
}
-static void add_call_dest(struct objtool_file *file, struct instruction *insn,
+static int add_call_dest(struct objtool_file *file, struct instruction *insn,
struct symbol *dest, bool sibling)
{
insn->_call_dest = dest;
if (!dest)
- return;
+ return 0;
/*
* Whatever stack impact regular CALLs have, should be undone
@@ -1365,10 +1379,10 @@ static void add_call_dest(struct objtool_file *file, struct instruction *insn,
*/
remove_insn_ops(insn);
- annotate_call_site(file, insn, sibling);
+ return annotate_call_site(file, insn, sibling);
}
-static void add_retpoline_call(struct objtool_file *file, struct instruction *insn)
+static int add_retpoline_call(struct objtool_file *file, struct instruction *insn)
{
/*
* Retpoline calls/jumps are really dynamic calls/jumps in disguise,
@@ -1385,7 +1399,7 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in
insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
break;
default:
- return;
+ return 0;
}
insn->retpoline_safe = true;
@@ -1399,7 +1413,7 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in
*/
remove_insn_ops(insn);
- annotate_call_site(file, insn, false);
+ return annotate_call_site(file, insn, false);
}
static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
@@ -1468,8 +1482,11 @@ static int add_jump_destinations(struct objtool_file *file)
struct reloc *reloc;
struct section *dest_sec;
unsigned long dest_off;
+ int ret;
for_each_insn(file, insn) {
+ struct symbol *func = insn_func(insn);
+
if (insn->jump_dest) {
/*
* handle_group_alt() may have previously set
@@ -1488,17 +1505,21 @@ static int add_jump_destinations(struct objtool_file *file)
dest_sec = reloc->sym->sec;
dest_off = arch_dest_reloc_offset(reloc_addend(reloc));
} else if (reloc->sym->retpoline_thunk) {
- add_retpoline_call(file, insn);
+ ret = add_retpoline_call(file, insn);
+ if (ret)
+ return ret;
continue;
} else if (reloc->sym->return_thunk) {
add_return_call(file, insn, true);
continue;
- } else if (insn_func(insn)) {
+ } else if (func) {
/*
* External sibling call or internal sibling call with
* STT_FUNC reloc.
*/
- add_call_dest(file, insn, reloc->sym, true);
+ ret = add_call_dest(file, insn, reloc->sym, true);
+ if (ret)
+ return ret;
continue;
} else if (reloc->sym->sec->idx) {
dest_sec = reloc->sym->sec;
@@ -1526,8 +1547,17 @@ static int add_jump_destinations(struct objtool_file *file)
continue;
}
- WARN_INSN(insn, "can't find jump dest instruction at %s+0x%lx",
- dest_sec->name, dest_off);
+ /*
+ * GCOV/KCOV dead code can jump to the end of the
+ * function/section.
+ */
+ if (file->ignore_unreachables && func &&
+ dest_sec == insn->sec &&
+ dest_off == func->offset + func->len)
+ continue;
+
+ ERROR_INSN(insn, "can't find jump dest instruction at %s+0x%lx",
+ dest_sec->name, dest_off);
return -1;
}
@@ -1538,7 +1568,9 @@ static int add_jump_destinations(struct objtool_file *file)
*/
if (jump_dest->sym && jump_dest->offset == jump_dest->sym->offset) {
if (jump_dest->sym->retpoline_thunk) {
- add_retpoline_call(file, insn);
+ ret = add_retpoline_call(file, insn);
+ if (ret)
+ return ret;
continue;
}
if (jump_dest->sym->return_thunk) {
@@ -1550,8 +1582,7 @@ static int add_jump_destinations(struct objtool_file *file)
/*
* Cross-function jump.
*/
- if (insn_func(insn) && insn_func(jump_dest) &&
- insn_func(insn) != insn_func(jump_dest)) {
+ if (func && insn_func(jump_dest) && func != insn_func(jump_dest)) {
/*
* For GCC 8+, create parent/child links for any cold
@@ -1568,10 +1599,10 @@ static int add_jump_destinations(struct objtool_file *file)
* case where the parent function's only reference to a
* subfunction is through a jump table.
*/
- if (!strstr(insn_func(insn)->name, ".cold") &&
+ if (!strstr(func->name, ".cold") &&
strstr(insn_func(jump_dest)->name, ".cold")) {
- insn_func(insn)->cfunc = insn_func(jump_dest);
- insn_func(jump_dest)->pfunc = insn_func(insn);
+ func->cfunc = insn_func(jump_dest);
+ insn_func(jump_dest)->pfunc = func;
}
}
@@ -1580,7 +1611,9 @@ static int add_jump_destinations(struct objtool_file *file)
* Internal sibling call without reloc or with
* STT_SECTION reloc.
*/
- add_call_dest(file, insn, insn_func(jump_dest), true);
+ ret = add_call_dest(file, insn, insn_func(jump_dest), true);
+ if (ret)
+ return ret;
continue;
}
@@ -1610,8 +1643,10 @@ static int add_call_destinations(struct objtool_file *file)
unsigned long dest_off;
struct symbol *dest;
struct reloc *reloc;
+ int ret;
for_each_insn(file, insn) {
+ struct symbol *func = insn_func(insn);
if (insn->type != INSN_CALL)
continue;
@@ -1620,18 +1655,20 @@ static int add_call_destinations(struct objtool_file *file)
dest_off = arch_jump_destination(insn);
dest = find_call_destination(insn->sec, dest_off);
- add_call_dest(file, insn, dest, false);
+ ret = add_call_dest(file, insn, dest, false);
+ if (ret)
+ return ret;
- if (insn->ignore)
+ if (func && func->ignore)
continue;
if (!insn_call_dest(insn)) {
- WARN_INSN(insn, "unannotated intra-function call");
+ ERROR_INSN(insn, "unannotated intra-function call");
return -1;
}
- if (insn_func(insn) && insn_call_dest(insn)->type != STT_FUNC) {
- WARN_INSN(insn, "unsupported call to non-function");
+ if (func && insn_call_dest(insn)->type != STT_FUNC) {
+ ERROR_INSN(insn, "unsupported call to non-function");
return -1;
}
@@ -1639,18 +1676,25 @@ static int add_call_destinations(struct objtool_file *file)
dest_off = arch_dest_reloc_offset(reloc_addend(reloc));
dest = find_call_destination(reloc->sym->sec, dest_off);
if (!dest) {
- WARN_INSN(insn, "can't find call dest symbol at %s+0x%lx",
- reloc->sym->sec->name, dest_off);
+ ERROR_INSN(insn, "can't find call dest symbol at %s+0x%lx",
+ reloc->sym->sec->name, dest_off);
return -1;
}
- add_call_dest(file, insn, dest, false);
+ ret = add_call_dest(file, insn, dest, false);
+ if (ret)
+ return ret;
} else if (reloc->sym->retpoline_thunk) {
- add_retpoline_call(file, insn);
+ ret = add_retpoline_call(file, insn);
+ if (ret)
+ return ret;
- } else
- add_call_dest(file, insn, reloc->sym, false);
+ } else {
+ ret = add_call_dest(file, insn, reloc->sym, false);
+ if (ret)
+ return ret;
+ }
}
return 0;
@@ -1673,15 +1717,15 @@ static int handle_group_alt(struct objtool_file *file,
if (!orig_alt_group) {
struct instruction *last_orig_insn = NULL;
- orig_alt_group = malloc(sizeof(*orig_alt_group));
+ orig_alt_group = calloc(1, sizeof(*orig_alt_group));
if (!orig_alt_group) {
- WARN("malloc failed");
+ ERROR_GLIBC("calloc");
return -1;
}
orig_alt_group->cfi = calloc(special_alt->orig_len,
sizeof(struct cfi_state *));
if (!orig_alt_group->cfi) {
- WARN("calloc failed");
+ ERROR_GLIBC("calloc");
return -1;
}
@@ -1697,21 +1741,22 @@ static int handle_group_alt(struct objtool_file *file,
orig_alt_group->first_insn = orig_insn;
orig_alt_group->last_insn = last_orig_insn;
orig_alt_group->nop = NULL;
+ orig_alt_group->ignore = orig_insn->ignore_alts;
} else {
if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len -
orig_alt_group->first_insn->offset != special_alt->orig_len) {
- WARN_INSN(orig_insn, "weirdly overlapping alternative! %ld != %d",
- orig_alt_group->last_insn->offset +
- orig_alt_group->last_insn->len -
- orig_alt_group->first_insn->offset,
- special_alt->orig_len);
+ ERROR_INSN(orig_insn, "weirdly overlapping alternative! %ld != %d",
+ orig_alt_group->last_insn->offset +
+ orig_alt_group->last_insn->len -
+ orig_alt_group->first_insn->offset,
+ special_alt->orig_len);
return -1;
}
}
- new_alt_group = malloc(sizeof(*new_alt_group));
+ new_alt_group = calloc(1, sizeof(*new_alt_group));
if (!new_alt_group) {
- WARN("malloc failed");
+ ERROR_GLIBC("calloc");
return -1;
}
@@ -1723,9 +1768,9 @@ static int handle_group_alt(struct objtool_file *file,
* instruction affects the stack, the instruction after it (the
* nop) will propagate the new state to the shared CFI array.
*/
- nop = malloc(sizeof(*nop));
+ nop = calloc(1, sizeof(*nop));
if (!nop) {
- WARN("malloc failed");
+ ERROR_GLIBC("calloc");
return -1;
}
memset(nop, 0, sizeof(*nop));
@@ -1736,7 +1781,6 @@ static int handle_group_alt(struct objtool_file *file,
nop->type = INSN_NOP;
nop->sym = orig_insn->sym;
nop->alt_group = new_alt_group;
- nop->ignore = orig_insn->ignore_alts;
}
if (!special_alt->new_len) {
@@ -1753,7 +1797,6 @@ static int handle_group_alt(struct objtool_file *file,
last_new_insn = insn;
- insn->ignore = orig_insn->ignore_alts;
insn->sym = orig_insn->sym;
insn->alt_group = new_alt_group;
@@ -1769,7 +1812,7 @@ static int handle_group_alt(struct objtool_file *file,
if (alt_reloc && arch_pc_relative_reloc(alt_reloc) &&
!arch_support_alt_relocation(special_alt, insn, alt_reloc)) {
- WARN_INSN(insn, "unsupported relocation in alternatives section");
+ ERROR_INSN(insn, "unsupported relocation in alternatives section");
return -1;
}
@@ -1783,15 +1826,15 @@ static int handle_group_alt(struct objtool_file *file,
if (dest_off == special_alt->new_off + special_alt->new_len) {
insn->jump_dest = next_insn_same_sec(file, orig_alt_group->last_insn);
if (!insn->jump_dest) {
- WARN_INSN(insn, "can't find alternative jump destination");
+ ERROR_INSN(insn, "can't find alternative jump destination");
return -1;
}
}
}
if (!last_new_insn) {
- WARN_FUNC("can't find last new alternative instruction",
- special_alt->new_sec, special_alt->new_off);
+ ERROR_FUNC(special_alt->new_sec, special_alt->new_off,
+ "can't find last new alternative instruction");
return -1;
}
@@ -1800,6 +1843,7 @@ end:
new_alt_group->first_insn = *new_insn;
new_alt_group->last_insn = last_new_insn;
new_alt_group->nop = nop;
+ new_alt_group->ignore = (*new_insn)->ignore_alts;
new_alt_group->cfi = orig_alt_group->cfi;
return 0;
}
@@ -1817,7 +1861,7 @@ static int handle_jump_alt(struct objtool_file *file,
if (orig_insn->type != INSN_JUMP_UNCONDITIONAL &&
orig_insn->type != INSN_NOP) {
- WARN_INSN(orig_insn, "unsupported instruction at jump label");
+ ERROR_INSN(orig_insn, "unsupported instruction at jump label");
return -1;
}
@@ -1826,9 +1870,13 @@ static int handle_jump_alt(struct objtool_file *file,
if (reloc)
set_reloc_type(file->elf, reloc, R_NONE);
- elf_write_insn(file->elf, orig_insn->sec,
- orig_insn->offset, orig_insn->len,
- arch_nop_insn(orig_insn->len));
+
+ if (elf_write_insn(file->elf, orig_insn->sec,
+ orig_insn->offset, orig_insn->len,
+ arch_nop_insn(orig_insn->len))) {
+ return -1;
+ }
+
orig_insn->type = INSN_NOP;
}
@@ -1864,19 +1912,17 @@ static int add_special_section_alts(struct objtool_file *file)
struct alternative *alt;
int ret;
- ret = special_get_alts(file->elf, &special_alts);
- if (ret)
- return ret;
+ if (special_get_alts(file->elf, &special_alts))
+ return -1;
list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
orig_insn = find_insn(file, special_alt->orig_sec,
special_alt->orig_off);
if (!orig_insn) {
- WARN_FUNC("special: can't find orig instruction",
- special_alt->orig_sec, special_alt->orig_off);
- ret = -1;
- goto out;
+ ERROR_FUNC(special_alt->orig_sec, special_alt->orig_off,
+ "special: can't find orig instruction");
+ return -1;
}
new_insn = NULL;
@@ -1884,41 +1930,37 @@ static int add_special_section_alts(struct objtool_file *file)
new_insn = find_insn(file, special_alt->new_sec,
special_alt->new_off);
if (!new_insn) {
- WARN_FUNC("special: can't find new instruction",
- special_alt->new_sec,
- special_alt->new_off);
- ret = -1;
- goto out;
+ ERROR_FUNC(special_alt->new_sec, special_alt->new_off,
+ "special: can't find new instruction");
+ return -1;
}
}
if (special_alt->group) {
if (!special_alt->orig_len) {
- WARN_INSN(orig_insn, "empty alternative entry");
+ ERROR_INSN(orig_insn, "empty alternative entry");
continue;
}
ret = handle_group_alt(file, special_alt, orig_insn,
&new_insn);
if (ret)
- goto out;
+ return ret;
+
} else if (special_alt->jump_or_nop) {
ret = handle_jump_alt(file, special_alt, orig_insn,
&new_insn);
if (ret)
- goto out;
+ return ret;
}
- alt = malloc(sizeof(*alt));
+ alt = calloc(1, sizeof(*alt));
if (!alt) {
- WARN("malloc failed");
- ret = -1;
- goto out;
+ ERROR_GLIBC("calloc");
+ return -1;
}
alt->insn = new_insn;
- alt->skip_orig = special_alt->skip_orig;
- orig_insn->ignore_alts |= special_alt->skip_alt;
alt->next = orig_insn->alts;
orig_insn->alts = alt;
@@ -1932,8 +1974,7 @@ static int add_special_section_alts(struct objtool_file *file)
printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long);
}
-out:
- return ret;
+ return 0;
}
__weak unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table)
@@ -1941,8 +1982,7 @@ __weak unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct relo
return reloc->sym->offset + reloc_addend(reloc);
}
-static int add_jump_table(struct objtool_file *file, struct instruction *insn,
- struct reloc *next_table)
+static int add_jump_table(struct objtool_file *file, struct instruction *insn)
{
unsigned long table_size = insn_jump_table_size(insn);
struct symbol *pfunc = insn_func(insn)->pfunc;
@@ -1962,7 +2002,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
/* Check for the end of the table: */
if (table_size && reloc_offset(reloc) - reloc_offset(table) >= table_size)
break;
- if (reloc != table && reloc == next_table)
+ if (reloc != table && is_jump_table(reloc))
break;
/* Make sure the table entries are consecutive: */
@@ -1991,9 +2031,9 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
if (!insn_func(dest_insn) || insn_func(dest_insn)->pfunc != pfunc)
break;
- alt = malloc(sizeof(*alt));
+ alt = calloc(1, sizeof(*alt));
if (!alt) {
- WARN("malloc failed");
+ ERROR_GLIBC("calloc");
return -1;
}
@@ -2005,7 +2045,7 @@ next:
}
if (!prev_offset) {
- WARN_INSN(insn, "can't find switch jump table");
+ ERROR_INSN(insn, "can't find switch jump table");
return -1;
}
@@ -2041,7 +2081,7 @@ static void find_jump_table(struct objtool_file *file, struct symbol *func,
insn->jump_dest &&
(insn->jump_dest->offset <= insn->offset ||
insn->jump_dest->offset > orig_insn->offset))
- break;
+ break;
table_reloc = arch_find_switch_table(file, insn, &table_size);
if (!table_reloc)
@@ -2053,8 +2093,10 @@ static void find_jump_table(struct objtool_file *file, struct symbol *func,
if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func)
continue;
+ set_jump_table(table_reloc);
orig_insn->_jump_table = table_reloc;
orig_insn->_jump_table_size = table_size;
+
break;
}
}
@@ -2096,31 +2138,19 @@ static void mark_func_jump_tables(struct objtool_file *file,
static int add_func_jump_tables(struct objtool_file *file,
struct symbol *func)
{
- struct instruction *insn, *insn_t1 = NULL, *insn_t2;
- int ret = 0;
+ struct instruction *insn;
+ int ret;
func_for_each_insn(file, func, insn) {
if (!insn_jump_table(insn))
continue;
- if (!insn_t1) {
- insn_t1 = insn;
- continue;
- }
-
- insn_t2 = insn;
-
- ret = add_jump_table(file, insn_t1, insn_jump_table(insn_t2));
+ ret = add_jump_table(file, insn);
if (ret)
return ret;
-
- insn_t1 = insn_t2;
}
- if (insn_t1)
- ret = add_jump_table(file, insn_t1, NULL);
-
- return ret;
+ return 0;
}
/*
@@ -2173,12 +2203,12 @@ static int read_unwind_hints(struct objtool_file *file)
return 0;
if (!sec->rsec) {
- WARN("missing .rela.discard.unwind_hints section");
+ ERROR("missing .rela.discard.unwind_hints section");
return -1;
}
if (sec->sh.sh_size % sizeof(struct unwind_hint)) {
- WARN("struct unwind_hint size mismatch");
+ ERROR("struct unwind_hint size mismatch");
return -1;
}
@@ -2189,7 +2219,7 @@ static int read_unwind_hints(struct objtool_file *file)
reloc = find_reloc_by_dest(file->elf, sec, i * sizeof(*hint));
if (!reloc) {
- WARN("can't find reloc for unwind_hints[%d]", i);
+ ERROR("can't find reloc for unwind_hints[%d]", i);
return -1;
}
@@ -2198,13 +2228,13 @@ static int read_unwind_hints(struct objtool_file *file)
} else if (reloc->sym->local_label) {
offset = reloc->sym->offset;
} else {
- WARN("unexpected relocation symbol type in %s", sec->rsec->name);
+ ERROR("unexpected relocation symbol type in %s", sec->rsec->name);
return -1;
}
insn = find_insn(file, reloc->sym->sec, offset);
if (!insn) {
- WARN("can't find insn for unwind_hints[%d]", i);
+ ERROR("can't find insn for unwind_hints[%d]", i);
return -1;
}
@@ -2231,7 +2261,8 @@ static int read_unwind_hints(struct objtool_file *file)
if (sym && sym->bind == STB_GLOBAL) {
if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) {
- WARN_INSN(insn, "UNWIND_HINT_IRET_REGS without ENDBR");
+ ERROR_INSN(insn, "UNWIND_HINT_IRET_REGS without ENDBR");
+ return -1;
}
}
}
@@ -2245,7 +2276,7 @@ static int read_unwind_hints(struct objtool_file *file)
cfi = *(insn->cfi);
if (arch_decode_hint_reg(hint->sp_reg, &cfi.cfa.base)) {
- WARN_INSN(insn, "unsupported unwind_hint sp base reg %d", hint->sp_reg);
+ ERROR_INSN(insn, "unsupported unwind_hint sp base reg %d", hint->sp_reg);
return -1;
}
@@ -2291,7 +2322,7 @@ static int read_annotate(struct objtool_file *file,
insn = find_insn(file, reloc->sym->sec, offset);
if (!insn) {
- WARN("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type);
+ ERROR("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type);
return -1;
}
@@ -2306,6 +2337,8 @@ static int read_annotate(struct objtool_file *file,
static int __annotate_early(struct objtool_file *file, int type, struct instruction *insn)
{
switch (type) {
+
+ /* Must be before add_special_section_alts() */
case ANNOTYPE_IGNORE_ALTS:
insn->ignore_alts = true;
break;
@@ -2332,7 +2365,7 @@ static int __annotate_ifc(struct objtool_file *file, int type, struct instructio
return 0;
if (insn->type != INSN_CALL) {
- WARN_INSN(insn, "intra_function_call not a direct call");
+ ERROR_INSN(insn, "intra_function_call not a direct call");
return -1;
}
@@ -2346,8 +2379,8 @@ static int __annotate_ifc(struct objtool_file *file, int type, struct instructio
dest_off = arch_jump_destination(insn);
insn->jump_dest = find_insn(file, insn->sec, dest_off);
if (!insn->jump_dest) {
- WARN_INSN(insn, "can't find call dest at %s+0x%lx",
- insn->sec->name, dest_off);
+ ERROR_INSN(insn, "can't find call dest at %s+0x%lx",
+ insn->sec->name, dest_off);
return -1;
}
@@ -2366,7 +2399,7 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi
insn->type != INSN_CALL_DYNAMIC &&
insn->type != INSN_RETURN &&
insn->type != INSN_NOP) {
- WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop");
+ ERROR_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop");
return -1;
}
@@ -2398,8 +2431,8 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi
break;
default:
- WARN_INSN(insn, "Unknown annotation type: %d", type);
- break;
+ ERROR_INSN(insn, "Unknown annotation type: %d", type);
+ return -1;
}
return 0;
@@ -2512,7 +2545,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
- add_ignores(file);
+ ret = add_ignores(file);
+ if (ret)
+ return ret;
+
add_uaccess_safe(file);
ret = read_annotate(file, __annotate_early);
@@ -2732,7 +2768,7 @@ static int update_cfi_state(struct instruction *insn,
if (cfa->base == CFI_UNDEFINED) {
if (insn_func(insn)) {
WARN_INSN(insn, "undefined stack state");
- return -1;
+ return 1;
}
return 0;
}
@@ -3175,9 +3211,8 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn
if (cficmp(alt_cfi[group_off], insn->cfi)) {
struct alt_group *orig_group = insn->alt_group->orig_group ?: insn->alt_group;
struct instruction *orig = orig_group->first_insn;
- char *where = offstr(insn->sec, insn->offset);
- WARN_INSN(orig, "stack layout conflict in alternatives: %s", where);
- free(where);
+ WARN_INSN(orig, "stack layout conflict in alternatives: %s",
+ offstr(insn->sec, insn->offset));
return -1;
}
}
@@ -3190,13 +3225,15 @@ static int handle_insn_ops(struct instruction *insn,
struct insn_state *state)
{
struct stack_op *op;
+ int ret;
for (op = insn->stack_ops; op; op = op->next) {
- if (update_cfi_state(insn, next_insn, &state->cfi, op))
- return 1;
+ ret = update_cfi_state(insn, next_insn, &state->cfi, op);
+ if (ret)
+ return ret;
- if (!insn->alt_group)
+ if (!opts.uaccess || !insn->alt_group)
continue;
if (op->dest.type == OP_DEST_PUSHF) {
@@ -3238,36 +3275,41 @@ static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2)
WARN_INSN(insn, "stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
cfi1->cfa.base, cfi1->cfa.offset,
cfi2->cfa.base, cfi2->cfa.offset);
+ return false;
+
+ }
- } else if (memcmp(&cfi1->regs, &cfi2->regs, sizeof(cfi1->regs))) {
+ if (memcmp(&cfi1->regs, &cfi2->regs, sizeof(cfi1->regs))) {
for (i = 0; i < CFI_NUM_REGS; i++) {
- if (!memcmp(&cfi1->regs[i], &cfi2->regs[i],
- sizeof(struct cfi_reg)))
+
+ if (!memcmp(&cfi1->regs[i], &cfi2->regs[i], sizeof(struct cfi_reg)))
continue;
WARN_INSN(insn, "stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d",
i, cfi1->regs[i].base, cfi1->regs[i].offset,
i, cfi2->regs[i].base, cfi2->regs[i].offset);
- break;
}
+ return false;
+ }
- } else if (cfi1->type != cfi2->type) {
+ if (cfi1->type != cfi2->type) {
WARN_INSN(insn, "stack state mismatch: type1=%d type2=%d",
cfi1->type, cfi2->type);
+ return false;
+ }
- } else if (cfi1->drap != cfi2->drap ||
+ if (cfi1->drap != cfi2->drap ||
(cfi1->drap && cfi1->drap_reg != cfi2->drap_reg) ||
(cfi1->drap && cfi1->drap_offset != cfi2->drap_offset)) {
WARN_INSN(insn, "stack state mismatch: drap1=%d(%d,%d) drap2=%d(%d,%d)",
cfi1->drap, cfi1->drap_reg, cfi1->drap_offset,
cfi2->drap, cfi2->drap_reg, cfi2->drap_offset);
+ return false;
+ }
- } else
- return true;
-
- return false;
+ return true;
}
static inline bool func_uaccess_safe(struct symbol *func)
@@ -3465,6 +3507,34 @@ next_orig:
return next_insn_same_sec(file, alt_group->orig_group->last_insn);
}
+static bool skip_alt_group(struct instruction *insn)
+{
+ struct instruction *alt_insn = insn->alts ? insn->alts->insn : NULL;
+
+ /* ANNOTATE_IGNORE_ALTERNATIVE */
+ if (insn->alt_group && insn->alt_group->ignore)
+ return true;
+
+ /*
+ * For NOP patched with CLAC/STAC, only follow the latter to avoid
+ * impossible code paths combining patched CLAC with unpatched STAC
+ * or vice versa.
+ *
+ * ANNOTATE_IGNORE_ALTERNATIVE could have been used here, but Linus
+ * requested not to do that to avoid hurting .s file readability
+ * around CLAC/STAC alternative sites.
+ */
+
+ if (!alt_insn)
+ return false;
+
+ /* Don't override ASM_{CLAC,STAC}_UNSAFE */
+ if (alt_insn->alt_group && alt_insn->alt_group->ignore)
+ return false;
+
+ return alt_insn->type == INSN_CLAC || alt_insn->type == INSN_STAC;
+}
+
/*
* Follow the branch starting at the given instruction, and recursively follow
* any other branches (jumps). Meanwhile, track the frame pointer state at
@@ -3480,6 +3550,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
u8 visited;
int ret;
+ if (func && func->ignore)
+ return 0;
+
sec = insn->sec;
while (1) {
@@ -3491,13 +3564,13 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
!strncmp(func->name, "__pfx_", 6))
return 0;
+ if (file->ignore_unreachables)
+ return 0;
+
WARN("%s() falls through to next function %s()",
func->name, insn_func(insn)->name);
- return 1;
- }
+ func->warned = 1;
- if (func && insn->ignore) {
- WARN_INSN(insn, "BUG: why am I validating an ignored function?");
return 1;
}
@@ -3572,24 +3645,19 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (propagate_alt_cfi(file, insn))
return 1;
- if (!insn->ignore_alts && insn->alts) {
- bool skip_orig = false;
-
+ if (insn->alts) {
for (alt = insn->alts; alt; alt = alt->next) {
- if (alt->skip_orig)
- skip_orig = true;
-
ret = validate_branch(file, func, alt->insn, state);
if (ret) {
BT_INSN(insn, "(alt)");
return ret;
}
}
-
- if (skip_orig)
- return 0;
}
+ if (skip_alt_group(insn))
+ return 0;
+
if (handle_insn_ops(insn, next_insn, &state))
return 1;
@@ -3610,9 +3678,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
return 1;
}
- if (insn->dead_end)
- return 0;
-
break;
case INSN_JUMP_CONDITIONAL:
@@ -3649,17 +3714,26 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
break;
- case INSN_CONTEXT_SWITCH:
- if (func) {
- if (!next_insn || !next_insn->hint) {
- WARN_INSN(insn, "unsupported instruction in callable function");
- return 1;
- }
- break;
+ case INSN_SYSCALL:
+ if (func && (!next_insn || !next_insn->hint)) {
+ WARN_INSN(insn, "unsupported instruction in callable function");
+ return 1;
+ }
+
+ break;
+
+ case INSN_SYSRET:
+ if (func && (!next_insn || !next_insn->hint)) {
+ WARN_INSN(insn, "unsupported instruction in callable function");
+ return 1;
}
+
return 0;
case INSN_STAC:
+ if (!opts.uaccess)
+ break;
+
if (state.uaccess) {
WARN_INSN(insn, "recursive UACCESS enable");
return 1;
@@ -3669,6 +3743,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
break;
case INSN_CLAC:
+ if (!opts.uaccess)
+ break;
+
if (!state.uaccess && func) {
WARN_INSN(insn, "redundant UACCESS disable");
return 1;
@@ -3710,7 +3787,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (!next_insn) {
if (state.cfi.cfa.base == CFI_UNDEFINED)
return 0;
- WARN("%s: unexpected end of section", sec->name);
+ if (file->ignore_unreachables)
+ return 0;
+
+ WARN("%s%sunexpected end of section %s",
+ func ? func->name : "", func ? "(): " : "",
+ sec->name);
return 1;
}
@@ -3725,7 +3807,7 @@ static int validate_unwind_hint(struct objtool_file *file,
struct instruction *insn,
struct insn_state *state)
{
- if (insn->hint && !insn->visited && !insn->ignore) {
+ if (insn->hint && !insn->visited) {
int ret = validate_branch(file, insn_func(insn), insn, *state);
if (ret)
BT_INSN(insn, "<=== (hint)");
@@ -3776,23 +3858,15 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
insn->visited |= VISITED_UNRET;
- if (!insn->ignore_alts && insn->alts) {
+ if (insn->alts) {
struct alternative *alt;
- bool skip_orig = false;
-
for (alt = insn->alts; alt; alt = alt->next) {
- if (alt->skip_orig)
- skip_orig = true;
-
ret = validate_unret(file, alt->insn);
if (ret) {
BT_INSN(insn, "(alt)");
return ret;
}
}
-
- if (skip_orig)
- return 0;
}
switch (insn->type) {
@@ -3808,7 +3882,7 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
if (!is_sibling_call(insn)) {
if (!insn->jump_dest) {
WARN_INSN(insn, "unresolved jump target after linking?!?");
- return -1;
+ return 1;
}
ret = validate_unret(file, insn->jump_dest);
if (ret) {
@@ -3830,7 +3904,7 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
if (!dest) {
WARN("Unresolved function after linking!?: %s",
insn_call_dest(insn)->name);
- return -1;
+ return 1;
}
ret = validate_unret(file, dest);
@@ -3848,6 +3922,12 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
WARN_INSN(insn, "RET before UNTRAIN");
return 1;
+ case INSN_SYSCALL:
+ break;
+
+ case INSN_SYSRET:
+ return 0;
+
case INSN_NOP:
if (insn->retpoline_safe)
return 0;
@@ -3857,9 +3937,12 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
break;
}
+ if (insn->dead_end)
+ return 0;
+
if (!next) {
WARN_INSN(insn, "teh end!");
- return -1;
+ return 1;
}
insn = next;
}
@@ -3874,18 +3957,13 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn)
static int validate_unrets(struct objtool_file *file)
{
struct instruction *insn;
- int ret, warnings = 0;
+ int warnings = 0;
for_each_insn(file, insn) {
if (!insn->unret)
continue;
- ret = validate_unret(file, insn);
- if (ret < 0) {
- WARN_INSN(insn, "Failed UNRET validation");
- return ret;
- }
- warnings += ret;
+ warnings += validate_unret(file, insn);
}
return warnings;
@@ -3911,13 +3989,13 @@ static int validate_retpoline(struct objtool_file *file)
if (insn->type == INSN_RETURN) {
if (opts.rethunk) {
WARN_INSN(insn, "'naked' return found in MITIGATION_RETHUNK build");
- } else
- continue;
- } else {
- WARN_INSN(insn, "indirect %s found in MITIGATION_RETPOLINE build",
- insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+ warnings++;
+ }
+ continue;
}
+ WARN_INSN(insn, "indirect %s found in MITIGATION_RETPOLINE build",
+ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
warnings++;
}
@@ -3939,10 +4017,11 @@ static bool is_ubsan_insn(struct instruction *insn)
static bool ignore_unreachable_insn(struct objtool_file *file, struct instruction *insn)
{
- int i;
+ struct symbol *func = insn_func(insn);
struct instruction *prev_insn;
+ int i;
- if (insn->ignore || insn->type == INSN_NOP || insn->type == INSN_TRAP)
+ if (insn->type == INSN_NOP || insn->type == INSN_TRAP || (func && func->ignore))
return true;
/*
@@ -3961,7 +4040,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
* In this case we'll find a piece of code (whole function) that is not
* covered by a !section symbol. Ignore them.
*/
- if (opts.link && !insn_func(insn)) {
+ if (opts.link && !func) {
int size = find_symbol_hole_containing(insn->sec, insn->offset);
unsigned long end = insn->offset + size;
@@ -3987,19 +4066,17 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
*/
if (insn->jump_dest && insn_func(insn->jump_dest) &&
strstr(insn_func(insn->jump_dest)->name, ".cold")) {
- struct instruction *dest = insn->jump_dest;
- func_for_each_insn(file, insn_func(dest), dest)
- dest->ignore = true;
+ insn_func(insn->jump_dest)->ignore = true;
}
}
return false;
}
- if (!insn_func(insn))
+ if (!func)
return false;
- if (insn_func(insn)->static_call_tramp)
+ if (func->static_call_tramp)
return true;
/*
@@ -4011,7 +4088,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
* It may also insert a UD2 after calling a __noreturn function.
*/
prev_insn = prev_insn_same_sec(file, insn);
- if (prev_insn->dead_end &&
+ if (prev_insn && prev_insn->dead_end &&
(insn->type == INSN_BUG ||
(insn->type == INSN_JUMP_UNCONDITIONAL &&
insn->jump_dest && insn->jump_dest->type == INSN_BUG)))
@@ -4030,7 +4107,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
if (insn->type == INSN_JUMP_UNCONDITIONAL) {
if (insn->jump_dest &&
- insn_func(insn->jump_dest) == insn_func(insn)) {
+ insn_func(insn->jump_dest) == func) {
insn = insn->jump_dest;
continue;
}
@@ -4038,7 +4115,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
break;
}
- if (insn->offset + insn->len >= insn_func(insn)->offset + insn_func(insn)->len)
+ if (insn->offset + insn->len >= func->offset + func->len)
break;
insn = next_insn_same_sec(file, insn);
@@ -4130,10 +4207,11 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
return 0;
insn = find_insn(file, sec, sym->offset);
- if (!insn || insn->ignore || insn->visited)
+ if (!insn || insn->visited)
return 0;
- state->uaccess = sym->uaccess_safe;
+ if (opts.uaccess)
+ state->uaccess = sym->uaccess_safe;
ret = validate_branch(file, insn_func(insn), insn, *state);
if (ret)
@@ -4354,9 +4432,8 @@ static int validate_ibt_data_reloc(struct objtool_file *file,
if (dest->noendbr)
return 0;
- WARN_FUNC("data relocation to !ENDBR: %s",
- reloc->sec->base, reloc_offset(reloc),
- offstr(dest->sec, dest->offset));
+ WARN_FUNC(reloc->sec->base, reloc_offset(reloc),
+ "data relocation to !ENDBR: %s", offstr(dest->sec, dest->offset));
return 1;
}
@@ -4484,13 +4561,15 @@ static int validate_reachable_instructions(struct objtool_file *file)
}
/* 'funcs' is a space-separated list of function names */
-static int disas_funcs(const char *funcs)
+static void disas_funcs(const char *funcs)
{
const char *objdump_str, *cross_compile;
int size, ret;
char *cmd;
cross_compile = getenv("CROSS_COMPILE");
+ if (!cross_compile)
+ cross_compile = "";
objdump_str = "%sobjdump -wdr %s | gawk -M -v _funcs='%s' '"
"BEGIN { split(_funcs, funcs); }"
@@ -4517,7 +4596,7 @@ static int disas_funcs(const char *funcs)
size = snprintf(NULL, 0, objdump_str, cross_compile, objname, funcs) + 1;
if (size <= 0) {
WARN("objdump string size calculation failed");
- return -1;
+ return;
}
cmd = malloc(size);
@@ -4527,24 +4606,30 @@ static int disas_funcs(const char *funcs)
ret = system(cmd);
if (ret) {
WARN("disassembly failed: %d", ret);
- return -1;
+ return;
}
-
- return 0;
}
-static int disas_warned_funcs(struct objtool_file *file)
+static void disas_warned_funcs(struct objtool_file *file)
{
struct symbol *sym;
char *funcs = NULL, *tmp;
for_each_sym(file, sym) {
- if (sym->warnings) {
+ if (sym->warned) {
if (!funcs) {
funcs = malloc(strlen(sym->name) + 1);
+ if (!funcs) {
+ ERROR_GLIBC("malloc");
+ return;
+ }
strcpy(funcs, sym->name);
} else {
tmp = malloc(strlen(funcs) + strlen(sym->name) + 2);
+ if (!tmp) {
+ ERROR_GLIBC("malloc");
+ return;
+ }
sprintf(tmp, "%s %s", funcs, sym->name);
free(funcs);
funcs = tmp;
@@ -4554,8 +4639,6 @@ static int disas_warned_funcs(struct objtool_file *file)
if (funcs)
disas_funcs(funcs);
-
- return 0;
}
struct insn_chunk {
@@ -4588,7 +4671,7 @@ static void free_insns(struct objtool_file *file)
int check(struct objtool_file *file)
{
- int ret, warnings = 0;
+ int ret = 0, warnings = 0;
arch_initial_func_cfi_state(&initial_func_cfi);
init_cfi_state(&init_cfi);
@@ -4606,44 +4689,27 @@ int check(struct objtool_file *file)
cfi_hash_add(&func_cfi);
ret = decode_sections(file);
- if (ret < 0)
+ if (ret)
goto out;
- warnings += ret;
-
if (!nr_insns)
goto out;
- if (opts.retpoline) {
- ret = validate_retpoline(file);
- if (ret < 0)
- goto out;
- warnings += ret;
- }
+ if (opts.retpoline)
+ warnings += validate_retpoline(file);
if (opts.stackval || opts.orc || opts.uaccess) {
- ret = validate_functions(file);
- if (ret < 0)
- goto out;
- warnings += ret;
+ int w = 0;
- ret = validate_unwind_hints(file, NULL);
- if (ret < 0)
- goto out;
- warnings += ret;
+ w += validate_functions(file);
+ w += validate_unwind_hints(file, NULL);
+ if (!w)
+ w += validate_reachable_instructions(file);
- if (!warnings) {
- ret = validate_reachable_instructions(file);
- if (ret < 0)
- goto out;
- warnings += ret;
- }
+ warnings += w;
} else if (opts.noinstr) {
- ret = validate_noinstr_sections(file);
- if (ret < 0)
- goto out;
- warnings += ret;
+ warnings += validate_noinstr_sections(file);
}
if (opts.unret) {
@@ -4651,94 +4717,71 @@ int check(struct objtool_file *file)
* Must be after validate_branch() and friends, it plays
* further games with insn->visited.
*/
- ret = validate_unrets(file);
- if (ret < 0)
- goto out;
- warnings += ret;
+ warnings += validate_unrets(file);
}
- if (opts.ibt) {
- ret = validate_ibt(file);
- if (ret < 0)
- goto out;
- warnings += ret;
- }
+ if (opts.ibt)
+ warnings += validate_ibt(file);
- if (opts.sls) {
- ret = validate_sls(file);
- if (ret < 0)
- goto out;
- warnings += ret;
- }
+ if (opts.sls)
+ warnings += validate_sls(file);
if (opts.static_call) {
ret = create_static_call_sections(file);
- if (ret < 0)
+ if (ret)
goto out;
- warnings += ret;
}
if (opts.retpoline) {
ret = create_retpoline_sites_sections(file);
- if (ret < 0)
+ if (ret)
goto out;
- warnings += ret;
}
if (opts.cfi) {
ret = create_cfi_sections(file);
- if (ret < 0)
+ if (ret)
goto out;
- warnings += ret;
}
if (opts.rethunk) {
ret = create_return_sites_sections(file);
- if (ret < 0)
+ if (ret)
goto out;
- warnings += ret;
if (opts.hack_skylake) {
ret = create_direct_call_sections(file);
- if (ret < 0)
+ if (ret)
goto out;
- warnings += ret;
}
}
if (opts.mcount) {
ret = create_mcount_loc_sections(file);
- if (ret < 0)
+ if (ret)
goto out;
- warnings += ret;
}
if (opts.prefix) {
ret = add_prefix_symbols(file);
- if (ret < 0)
+ if (ret)
goto out;
- warnings += ret;
}
if (opts.ibt) {
ret = create_ibt_endbr_seal_sections(file);
- if (ret < 0)
+ if (ret)
goto out;
- warnings += ret;
}
if (opts.orc && nr_insns) {
ret = orc_create(file);
- if (ret < 0)
+ if (ret)
goto out;
- warnings += ret;
}
free_insns(file);
- if (opts.verbose)
- disas_warned_funcs(file);
-
if (opts.stats) {
printf("nr_insns_visited: %ld\n", nr_insns_visited);
printf("nr_cfi: %ld\n", nr_cfi);
@@ -4747,19 +4790,18 @@ int check(struct objtool_file *file)
}
out:
- /*
- * CONFIG_OBJTOOL_WERROR upgrades all warnings (and errors) to actual
- * errors.
- *
- * Note that even "fatal" type errors don't actually return an error
- * without CONFIG_OBJTOOL_WERROR. That probably needs improved at some
- * point.
- */
- if (opts.werror && (ret || warnings)) {
- if (warnings)
+ if (!ret && !warnings)
+ return 0;
+
+ if (opts.werror && warnings)
+ ret = 1;
+
+ if (opts.verbose) {
+ if (opts.werror && warnings)
WARN("%d warning(s) upgraded to errors", warnings);
- return 1;
+ print_args();
+ disas_warned_funcs(file);
}
- return 0;
+ return ret;
}
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index be4f4b62730c..727a3a4fd9d7 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -72,17 +72,17 @@ static inline void __elf_hash_del(struct elf_hash_node *node,
obj; \
obj = elf_list_entry(obj->member.next, typeof(*(obj)), member))
-#define elf_alloc_hash(name, size) \
-({ \
- __elf_bits(name) = max(10, ilog2(size)); \
+#define elf_alloc_hash(name, size) \
+({ \
+ __elf_bits(name) = max(10, ilog2(size)); \
__elf_table(name) = mmap(NULL, sizeof(struct elf_hash_node *) << __elf_bits(name), \
- PROT_READ|PROT_WRITE, \
- MAP_PRIVATE|MAP_ANON, -1, 0); \
- if (__elf_table(name) == (void *)-1L) { \
- WARN("mmap fail " #name); \
- __elf_table(name) = NULL; \
- } \
- __elf_table(name); \
+ PROT_READ|PROT_WRITE, \
+ MAP_PRIVATE|MAP_ANON, -1, 0); \
+ if (__elf_table(name) == (void *)-1L) { \
+ ERROR_GLIBC("mmap fail " #name); \
+ __elf_table(name) = NULL; \
+ } \
+ __elf_table(name); \
})
static inline unsigned long __sym_start(struct symbol *s)
@@ -316,12 +316,12 @@ static int read_sections(struct elf *elf)
int i;
if (elf_getshdrnum(elf->elf, &sections_nr)) {
- WARN_ELF("elf_getshdrnum");
+ ERROR_ELF("elf_getshdrnum");
return -1;
}
if (elf_getshdrstrndx(elf->elf, &shstrndx)) {
- WARN_ELF("elf_getshdrstrndx");
+ ERROR_ELF("elf_getshdrstrndx");
return -1;
}
@@ -331,7 +331,7 @@ static int read_sections(struct elf *elf)
elf->section_data = calloc(sections_nr, sizeof(*sec));
if (!elf->section_data) {
- perror("calloc");
+ ERROR_GLIBC("calloc");
return -1;
}
for (i = 0; i < sections_nr; i++) {
@@ -341,33 +341,32 @@ static int read_sections(struct elf *elf)
s = elf_getscn(elf->elf, i);
if (!s) {
- WARN_ELF("elf_getscn");
+ ERROR_ELF("elf_getscn");
return -1;
}
sec->idx = elf_ndxscn(s);
if (!gelf_getshdr(s, &sec->sh)) {
- WARN_ELF("gelf_getshdr");
+ ERROR_ELF("gelf_getshdr");
return -1;
}
sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name);
if (!sec->name) {
- WARN_ELF("elf_strptr");
+ ERROR_ELF("elf_strptr");
return -1;
}
if (sec->sh.sh_size != 0 && !is_dwarf_section(sec)) {
sec->data = elf_getdata(s, NULL);
if (!sec->data) {
- WARN_ELF("elf_getdata");
+ ERROR_ELF("elf_getdata");
return -1;
}
if (sec->data->d_off != 0 ||
sec->data->d_size != sec->sh.sh_size) {
- WARN("unexpected data attributes for %s",
- sec->name);
+ ERROR("unexpected data attributes for %s", sec->name);
return -1;
}
}
@@ -387,7 +386,7 @@ static int read_sections(struct elf *elf)
/* sanity check, one more call to elf_nextscn() should return NULL */
if (elf_nextscn(elf->elf, s)) {
- WARN("section entry mismatch");
+ ERROR("section entry mismatch");
return -1;
}
@@ -467,7 +466,7 @@ static int read_symbols(struct elf *elf)
elf->symbol_data = calloc(symbols_nr, sizeof(*sym));
if (!elf->symbol_data) {
- perror("calloc");
+ ERROR_GLIBC("calloc");
return -1;
}
for (i = 0; i < symbols_nr; i++) {
@@ -477,14 +476,14 @@ static int read_symbols(struct elf *elf)
if (!gelf_getsymshndx(symtab->data, shndx_data, i, &sym->sym,
&shndx)) {
- WARN_ELF("gelf_getsymshndx");
+ ERROR_ELF("gelf_getsymshndx");
goto err;
}
sym->name = elf_strptr(elf->elf, symtab->sh.sh_link,
sym->sym.st_name);
if (!sym->name) {
- WARN_ELF("elf_strptr");
+ ERROR_ELF("elf_strptr");
goto err;
}
@@ -496,8 +495,7 @@ static int read_symbols(struct elf *elf)
sym->sec = find_section_by_index(elf, shndx);
if (!sym->sec) {
- WARN("couldn't find section for symbol %s",
- sym->name);
+ ERROR("couldn't find section for symbol %s", sym->name);
goto err;
}
if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) {
@@ -536,8 +534,7 @@ static int read_symbols(struct elf *elf)
pnamelen = coldstr - sym->name;
pname = strndup(sym->name, pnamelen);
if (!pname) {
- WARN("%s(): failed to allocate memory",
- sym->name);
+ ERROR("%s(): failed to allocate memory", sym->name);
return -1;
}
@@ -545,8 +542,7 @@ static int read_symbols(struct elf *elf)
free(pname);
if (!pfunc) {
- WARN("%s(): can't find parent function",
- sym->name);
+ ERROR("%s(): can't find parent function", sym->name);
return -1;
}
@@ -583,7 +579,7 @@ static int elf_update_sym_relocs(struct elf *elf, struct symbol *sym)
{
struct reloc *reloc;
- for (reloc = sym->relocs; reloc; reloc = reloc->sym_next_reloc)
+ for (reloc = sym->relocs; reloc; reloc = sym_next_reloc(reloc))
set_reloc_sym(elf, reloc, reloc->sym->idx);
return 0;
@@ -613,14 +609,14 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
s = elf_getscn(elf->elf, symtab->idx);
if (!s) {
- WARN_ELF("elf_getscn");
+ ERROR_ELF("elf_getscn");
return -1;
}
if (symtab_shndx) {
t = elf_getscn(elf->elf, symtab_shndx->idx);
if (!t) {
- WARN_ELF("elf_getscn");
+ ERROR_ELF("elf_getscn");
return -1;
}
}
@@ -643,7 +639,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
if (idx) {
/* we don't do holes in symbol tables */
- WARN("index out of range");
+ ERROR("index out of range");
return -1;
}
@@ -654,7 +650,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
buf = calloc(num, entsize);
if (!buf) {
- WARN("malloc");
+ ERROR_GLIBC("calloc");
return -1;
}
@@ -669,7 +665,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
if (t) {
buf = calloc(num, sizeof(Elf32_Word));
if (!buf) {
- WARN("malloc");
+ ERROR_GLIBC("calloc");
return -1;
}
@@ -687,7 +683,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
/* empty blocks should not happen */
if (!symtab_data->d_size) {
- WARN("zero size data");
+ ERROR("zero size data");
return -1;
}
@@ -702,7 +698,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
/* something went side-ways */
if (idx < 0) {
- WARN("negative index");
+ ERROR("negative index");
return -1;
}
@@ -714,13 +710,13 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
} else {
sym->sym.st_shndx = SHN_XINDEX;
if (!shndx_data) {
- WARN("no .symtab_shndx");
+ ERROR("no .symtab_shndx");
return -1;
}
}
if (!gelf_update_symshndx(symtab_data, shndx_data, idx, &sym->sym, shndx)) {
- WARN_ELF("gelf_update_symshndx");
+ ERROR_ELF("gelf_update_symshndx");
return -1;
}
@@ -738,7 +734,7 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym)
if (symtab) {
symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
} else {
- WARN("no .symtab");
+ ERROR("no .symtab");
return NULL;
}
@@ -760,7 +756,7 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym)
old->idx = new_idx;
if (elf_update_symbol(elf, symtab, symtab_shndx, old)) {
- WARN("elf_update_symbol move");
+ ERROR("elf_update_symbol move");
return NULL;
}
@@ -778,7 +774,7 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym)
non_local:
sym->idx = new_idx;
if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) {
- WARN("elf_update_symbol");
+ ERROR("elf_update_symbol");
return NULL;
}
@@ -799,7 +795,7 @@ elf_create_section_symbol(struct elf *elf, struct section *sec)
struct symbol *sym = calloc(1, sizeof(*sym));
if (!sym) {
- perror("malloc");
+ ERROR_GLIBC("malloc");
return NULL;
}
@@ -829,7 +825,7 @@ elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size)
char *name = malloc(namelen);
if (!sym || !name) {
- perror("malloc");
+ ERROR_GLIBC("malloc");
return NULL;
}
@@ -858,16 +854,16 @@ static struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec,
struct reloc *reloc, empty = { 0 };
if (reloc_idx >= sec_num_entries(rsec)) {
- WARN("%s: bad reloc_idx %u for %s with %d relocs",
- __func__, reloc_idx, rsec->name, sec_num_entries(rsec));
+ ERROR("%s: bad reloc_idx %u for %s with %d relocs",
+ __func__, reloc_idx, rsec->name, sec_num_entries(rsec));
return NULL;
}
reloc = &rsec->relocs[reloc_idx];
if (memcmp(reloc, &empty, sizeof(empty))) {
- WARN("%s: %s: reloc %d already initialized!",
- __func__, rsec->name, reloc_idx);
+ ERROR("%s: %s: reloc %d already initialized!",
+ __func__, rsec->name, reloc_idx);
return NULL;
}
@@ -880,7 +876,7 @@ static struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec,
set_reloc_addend(elf, reloc, addend);
elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
- reloc->sym_next_reloc = sym->relocs;
+ set_sym_next_reloc(reloc, sym->relocs);
sym->relocs = reloc;
return reloc;
@@ -896,8 +892,7 @@ struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec,
int addend = insn_off;
if (!(insn_sec->sh.sh_flags & SHF_EXECINSTR)) {
- WARN("bad call to %s() for data symbol %s",
- __func__, sym->name);
+ ERROR("bad call to %s() for data symbol %s", __func__, sym->name);
return NULL;
}
@@ -926,8 +921,7 @@ struct reloc *elf_init_reloc_data_sym(struct elf *elf, struct section *sec,
s64 addend)
{
if (sym->sec && (sec->sh.sh_flags & SHF_EXECINSTR)) {
- WARN("bad call to %s() for text symbol %s",
- __func__, sym->name);
+ ERROR("bad call to %s() for text symbol %s", __func__, sym->name);
return NULL;
}
@@ -953,8 +947,7 @@ static int read_relocs(struct elf *elf)
rsec->base = find_section_by_index(elf, rsec->sh.sh_info);
if (!rsec->base) {
- WARN("can't find base section for reloc section %s",
- rsec->name);
+ ERROR("can't find base section for reloc section %s", rsec->name);
return -1;
}
@@ -963,7 +956,7 @@ static int read_relocs(struct elf *elf)
nr_reloc = 0;
rsec->relocs = calloc(sec_num_entries(rsec), sizeof(*reloc));
if (!rsec->relocs) {
- perror("calloc");
+ ERROR_GLIBC("calloc");
return -1;
}
for (i = 0; i < sec_num_entries(rsec); i++) {
@@ -973,13 +966,12 @@ static int read_relocs(struct elf *elf)
symndx = reloc_sym(reloc);
reloc->sym = sym = find_symbol_by_index(elf, symndx);
if (!reloc->sym) {
- WARN("can't find reloc entry symbol %d for %s",
- symndx, rsec->name);
+ ERROR("can't find reloc entry symbol %d for %s", symndx, rsec->name);
return -1;
}
elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
- reloc->sym_next_reloc = sym->relocs;
+ set_sym_next_reloc(reloc, sym->relocs);
sym->relocs = reloc;
nr_reloc++;
@@ -1005,7 +997,7 @@ struct elf *elf_open_read(const char *name, int flags)
elf = malloc(sizeof(*elf));
if (!elf) {
- perror("malloc");
+ ERROR_GLIBC("malloc");
return NULL;
}
memset(elf, 0, sizeof(*elf));
@@ -1028,12 +1020,12 @@ struct elf *elf_open_read(const char *name, int flags)
elf->elf = elf_begin(elf->fd, cmd, NULL);
if (!elf->elf) {
- WARN_ELF("elf_begin");
+ ERROR_ELF("elf_begin");
goto err;
}
if (!gelf_getehdr(elf->elf, &elf->ehdr)) {
- WARN_ELF("gelf_getehdr");
+ ERROR_ELF("gelf_getehdr");
goto err;
}
@@ -1062,19 +1054,19 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
if (!strtab)
strtab = find_section_by_name(elf, ".strtab");
if (!strtab) {
- WARN("can't find .strtab section");
+ ERROR("can't find .strtab section");
return -1;
}
s = elf_getscn(elf->elf, strtab->idx);
if (!s) {
- WARN_ELF("elf_getscn");
+ ERROR_ELF("elf_getscn");
return -1;
}
data = elf_newdata(s);
if (!data) {
- WARN_ELF("elf_newdata");
+ ERROR_ELF("elf_newdata");
return -1;
}
@@ -1099,7 +1091,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
sec = malloc(sizeof(*sec));
if (!sec) {
- perror("malloc");
+ ERROR_GLIBC("malloc");
return NULL;
}
memset(sec, 0, sizeof(*sec));
@@ -1108,13 +1100,13 @@ struct section *elf_create_section(struct elf *elf, const char *name,
s = elf_newscn(elf->elf);
if (!s) {
- WARN_ELF("elf_newscn");
+ ERROR_ELF("elf_newscn");
return NULL;
}
sec->name = strdup(name);
if (!sec->name) {
- perror("strdup");
+ ERROR_GLIBC("strdup");
return NULL;
}
@@ -1122,7 +1114,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
sec->data = elf_newdata(s);
if (!sec->data) {
- WARN_ELF("elf_newdata");
+ ERROR_ELF("elf_newdata");
return NULL;
}
@@ -1132,14 +1124,14 @@ struct section *elf_create_section(struct elf *elf, const char *name,
if (size) {
sec->data->d_buf = malloc(size);
if (!sec->data->d_buf) {
- perror("malloc");
+ ERROR_GLIBC("malloc");
return NULL;
}
memset(sec->data->d_buf, 0, size);
}
if (!gelf_getshdr(s, &sec->sh)) {
- WARN_ELF("gelf_getshdr");
+ ERROR_ELF("gelf_getshdr");
return NULL;
}
@@ -1154,7 +1146,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
if (!shstrtab)
shstrtab = find_section_by_name(elf, ".strtab");
if (!shstrtab) {
- WARN("can't find .shstrtab or .strtab section");
+ ERROR("can't find .shstrtab or .strtab section");
return NULL;
}
sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
@@ -1179,7 +1171,7 @@ static struct section *elf_create_rela_section(struct elf *elf,
rsec_name = malloc(strlen(sec->name) + strlen(".rela") + 1);
if (!rsec_name) {
- perror("malloc");
+ ERROR_GLIBC("malloc");
return NULL;
}
strcpy(rsec_name, ".rela");
@@ -1199,7 +1191,7 @@ static struct section *elf_create_rela_section(struct elf *elf,
rsec->relocs = calloc(sec_num_entries(rsec), sizeof(struct reloc));
if (!rsec->relocs) {
- perror("calloc");
+ ERROR_GLIBC("calloc");
return NULL;
}
@@ -1232,7 +1224,7 @@ int elf_write_insn(struct elf *elf, struct section *sec,
Elf_Data *data = sec->data;
if (data->d_type != ELF_T_BYTE || data->d_off) {
- WARN("write to unexpected data for section: %s", sec->name);
+ ERROR("write to unexpected data for section: %s", sec->name);
return -1;
}
@@ -1261,7 +1253,7 @@ static int elf_truncate_section(struct elf *elf, struct section *sec)
s = elf_getscn(elf->elf, sec->idx);
if (!s) {
- WARN_ELF("elf_getscn");
+ ERROR_ELF("elf_getscn");
return -1;
}
@@ -1271,7 +1263,7 @@ static int elf_truncate_section(struct elf *elf, struct section *sec)
if (!data) {
if (size) {
- WARN("end of section data but non-zero size left\n");
+ ERROR("end of section data but non-zero size left\n");
return -1;
}
return 0;
@@ -1279,12 +1271,12 @@ static int elf_truncate_section(struct elf *elf, struct section *sec)
if (truncated) {
/* when we remove symbols */
- WARN("truncated; but more data\n");
+ ERROR("truncated; but more data\n");
return -1;
}
if (!data->d_size) {
- WARN("zero size data");
+ ERROR("zero size data");
return -1;
}
@@ -1310,13 +1302,13 @@ int elf_write(struct elf *elf)
if (sec_changed(sec)) {
s = elf_getscn(elf->elf, sec->idx);
if (!s) {
- WARN_ELF("elf_getscn");
+ ERROR_ELF("elf_getscn");
return -1;
}
/* Note this also flags the section dirty */
if (!gelf_update_shdr(s, &sec->sh)) {
- WARN_ELF("gelf_update_shdr");
+ ERROR_ELF("gelf_update_shdr");
return -1;
}
@@ -1329,7 +1321,7 @@ int elf_write(struct elf *elf)
/* Write all changes to the file. */
if (elf_update(elf->elf, ELF_C_WRITE) < 0) {
- WARN_ELF("elf_update");
+ ERROR_ELF("elf_update");
return -1;
}
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 089a1acc48a8..01ef6f415adf 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -19,7 +19,8 @@ enum insn_type {
INSN_CALL,
INSN_CALL_DYNAMIC,
INSN_RETURN,
- INSN_CONTEXT_SWITCH,
+ INSN_SYSCALL,
+ INSN_SYSRET,
INSN_BUG,
INSN_NOP,
INSN_STAC,
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 0fafd0f7a209..6b08666fa69d 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -43,8 +43,10 @@ struct opts {
extern struct opts opts;
-extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
+int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
-extern int objtool_run(int argc, const char **argv);
+int objtool_run(int argc, const char **argv);
+
+void print_args(void);
#endif /* _BUILTIN_H */
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index e1cd13cd28a3..00fb745e7233 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -34,6 +34,8 @@ struct alt_group {
* This is shared with the other alt_groups in the same alternative.
*/
struct cfi_state **cfi;
+
+ bool ignore;
};
#define INSN_CHUNK_BITS 8
@@ -54,7 +56,6 @@ struct instruction {
u32 idx : INSN_CHUNK_BITS,
dead_end : 1,
- ignore : 1,
ignore_alts : 1,
hint : 1,
save : 1,
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 223ac1c24b90..c7c4e87ebe88 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -65,10 +65,11 @@ struct symbol {
u8 return_thunk : 1;
u8 fentry : 1;
u8 profiling_func : 1;
+ u8 warned : 1;
u8 embedded_insn : 1;
u8 local_label : 1;
u8 frame_pointer : 1;
- u8 warnings : 2;
+ u8 ignore : 1;
struct list_head pv_target;
struct reloc *relocs;
};
@@ -77,7 +78,7 @@ struct reloc {
struct elf_hash_node hash;
struct section *sec;
struct symbol *sym;
- struct reloc *sym_next_reloc;
+ unsigned long _sym_next_reloc;
};
struct elf {
@@ -297,6 +298,31 @@ static inline void set_reloc_type(struct elf *elf, struct reloc *reloc, unsigned
mark_sec_changed(elf, reloc->sec, true);
}
+#define RELOC_JUMP_TABLE_BIT 1UL
+
+/* Does reloc mark the beginning of a jump table? */
+static inline bool is_jump_table(struct reloc *reloc)
+{
+ return reloc->_sym_next_reloc & RELOC_JUMP_TABLE_BIT;
+}
+
+static inline void set_jump_table(struct reloc *reloc)
+{
+ reloc->_sym_next_reloc |= RELOC_JUMP_TABLE_BIT;
+}
+
+static inline struct reloc *sym_next_reloc(struct reloc *reloc)
+{
+ return (struct reloc *)(reloc->_sym_next_reloc & ~RELOC_JUMP_TABLE_BIT);
+}
+
+static inline void set_sym_next_reloc(struct reloc *reloc, struct reloc *next)
+{
+ unsigned long bit = reloc->_sym_next_reloc & RELOC_JUMP_TABLE_BIT;
+
+ reloc->_sym_next_reloc = (unsigned long)next | bit;
+}
+
#define for_each_sec(file, sec) \
list_for_each_entry(sec, &file->elf->sections, list)
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index 94a33ee7b363..c0dc86a78ff6 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -41,7 +41,7 @@ struct objtool_file {
struct objtool_file *objtool_open_read(const char *_objname);
-void objtool_pv_add(struct objtool_file *file, int idx, struct symbol *func);
+int objtool_pv_add(struct objtool_file *file, int idx, struct symbol *func);
int check(struct objtool_file *file);
int orc_dump(const char *objname);
diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h
index e049679bb17b..72d09c0adf1a 100644
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -16,8 +16,6 @@ struct special_alt {
struct list_head list;
bool group;
- bool skip_orig;
- bool skip_alt;
bool jump_or_nop;
u8 key_addend;
@@ -32,7 +30,7 @@ struct special_alt {
int special_get_alts(struct elf *elf, struct list_head *alts);
-void arch_handle_alternative(unsigned short feature, struct special_alt *alt);
+void arch_handle_alternative(struct special_alt *alt);
bool arch_support_alt_relocation(struct special_alt *special_alt,
struct instruction *insn,
diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h
index e72b9d630551..cb8fe846d9dd 100644
--- a/tools/objtool/include/objtool/warn.h
+++ b/tools/objtool/include/objtool/warn.h
@@ -11,6 +11,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
+#include <errno.h>
#include <objtool/builtin.h>
#include <objtool/elf.h>
@@ -41,36 +42,46 @@ static inline char *offstr(struct section *sec, unsigned long offset)
return str;
}
-#define WARN(format, ...) \
- fprintf(stderr, \
- "%s: %s: objtool: " format "\n", \
- objname, \
- opts.werror ? "error" : "warning", \
+#define ___WARN(severity, extra, format, ...) \
+ fprintf(stderr, \
+ "%s%s%s: objtool" extra ": " format "\n", \
+ objname ?: "", \
+ objname ? ": " : "", \
+ severity, \
##__VA_ARGS__)
-#define WARN_FUNC(format, sec, offset, ...) \
-({ \
- char *_str = offstr(sec, offset); \
- WARN("%s: " format, _str, ##__VA_ARGS__); \
- free(_str); \
+#define __WARN(severity, format, ...) \
+ ___WARN(severity, "", format, ##__VA_ARGS__)
+
+#define __WARN_LINE(severity, format, ...) \
+ ___WARN(severity, " [%s:%d]", format, __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define __WARN_ELF(severity, format, ...) \
+ __WARN_LINE(severity, "%s: " format " failed: %s", __func__, ##__VA_ARGS__, elf_errmsg(-1))
+
+#define __WARN_GLIBC(severity, format, ...) \
+ __WARN_LINE(severity, "%s: " format " failed: %s", __func__, ##__VA_ARGS__, strerror(errno))
+
+#define __WARN_FUNC(severity, sec, offset, format, ...) \
+({ \
+ char *_str = offstr(sec, offset); \
+ __WARN(severity, "%s: " format, _str, ##__VA_ARGS__); \
+ free(_str); \
})
-#define WARN_LIMIT 2
+#define WARN_STR (opts.werror ? "error" : "warning")
+
+#define WARN(format, ...) __WARN(WARN_STR, format, ##__VA_ARGS__)
+#define WARN_FUNC(sec, offset, format, ...) __WARN_FUNC(WARN_STR, sec, offset, format, ##__VA_ARGS__)
#define WARN_INSN(insn, format, ...) \
({ \
struct instruction *_insn = (insn); \
- BUILD_BUG_ON(WARN_LIMIT > 2); \
- if (!_insn->sym || _insn->sym->warnings < WARN_LIMIT) { \
- WARN_FUNC(format, _insn->sec, _insn->offset, \
+ if (!_insn->sym || !_insn->sym->warned) \
+ WARN_FUNC(_insn->sec, _insn->offset, format, \
##__VA_ARGS__); \
- if (_insn->sym) \
- _insn->sym->warnings++; \
- } else if (_insn->sym && _insn->sym->warnings == WARN_LIMIT) { \
- WARN_FUNC("skipping duplicate warning(s)", \
- _insn->sec, _insn->offset); \
- _insn->sym->warnings++; \
- } \
+ if (_insn->sym) \
+ _insn->sym->warned = 1; \
})
#define BT_INSN(insn, format, ...) \
@@ -83,7 +94,12 @@ static inline char *offstr(struct section *sec, unsigned long offset)
} \
})
-#define WARN_ELF(format, ...) \
- WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
+#define ERROR_STR "error"
+
+#define ERROR(format, ...) __WARN(ERROR_STR, format, ##__VA_ARGS__)
+#define ERROR_ELF(format, ...) __WARN_ELF(ERROR_STR, format, ##__VA_ARGS__)
+#define ERROR_GLIBC(format, ...) __WARN_GLIBC(ERROR_STR, format, ##__VA_ARGS__)
+#define ERROR_FUNC(sec, offset, format, ...) __WARN_FUNC(ERROR_STR, sec, offset, format, ##__VA_ARGS__)
+#define ERROR_INSN(insn, format, ...) WARN_FUNC(insn->sec, insn->offset, format, ##__VA_ARGS__)
#endif /* _WARN_H */
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 1c73fb62fd57..5c8b974ad0f9 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -23,7 +23,7 @@ static struct objtool_file file;
struct objtool_file *objtool_open_read(const char *filename)
{
if (file.elf) {
- WARN("won't handle more than one file at a time");
+ ERROR("won't handle more than one file at a time");
return NULL;
}
@@ -44,14 +44,14 @@ struct objtool_file *objtool_open_read(const char *filename)
return &file;
}
-void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
+int objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
{
if (!opts.noinstr)
- return;
+ return 0;
if (!f->pv_ops) {
- WARN("paravirt confusion");
- return;
+ ERROR("paravirt confusion");
+ return -1;
}
/*
@@ -60,14 +60,15 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
*/
if (!strcmp(func->name, "_paravirt_nop") ||
!strcmp(func->name, "_paravirt_ident_64"))
- return;
+ return 0;
/* already added this function */
if (!list_empty(&func->pv_target))
- return;
+ return 0;
list_add(&func->pv_target, &f->pv_ops[idx].targets);
f->pv_ops[idx].clean = false;
+ return 0;
}
int main(int argc, const char **argv)
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index 05ef0e297837..1dd9fc18fe62 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -36,47 +36,47 @@ int orc_dump(const char *filename)
elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
if (!elf) {
- WARN_ELF("elf_begin");
+ ERROR_ELF("elf_begin");
return -1;
}
if (!elf64_getehdr(elf)) {
- WARN_ELF("elf64_getehdr");
+ ERROR_ELF("elf64_getehdr");
return -1;
}
memcpy(&dummy_elf.ehdr, elf64_getehdr(elf), sizeof(dummy_elf.ehdr));
if (elf_getshdrnum(elf, &nr_sections)) {
- WARN_ELF("elf_getshdrnum");
+ ERROR_ELF("elf_getshdrnum");
return -1;
}
if (elf_getshdrstrndx(elf, &shstrtab_idx)) {
- WARN_ELF("elf_getshdrstrndx");
+ ERROR_ELF("elf_getshdrstrndx");
return -1;
}
for (i = 0; i < nr_sections; i++) {
scn = elf_getscn(elf, i);
if (!scn) {
- WARN_ELF("elf_getscn");
+ ERROR_ELF("elf_getscn");
return -1;
}
if (!gelf_getshdr(scn, &sh)) {
- WARN_ELF("gelf_getshdr");
+ ERROR_ELF("gelf_getshdr");
return -1;
}
name = elf_strptr(elf, shstrtab_idx, sh.sh_name);
if (!name) {
- WARN_ELF("elf_strptr");
+ ERROR_ELF("elf_strptr");
return -1;
}
data = elf_getdata(scn, NULL);
if (!data) {
- WARN_ELF("elf_getdata");
+ ERROR_ELF("elf_getdata");
return -1;
}
@@ -99,7 +99,7 @@ int orc_dump(const char *filename)
return 0;
if (orc_size % sizeof(*orc) != 0) {
- WARN("bad .orc_unwind section size");
+ ERROR("bad .orc_unwind section size");
return -1;
}
@@ -107,36 +107,36 @@ int orc_dump(const char *filename)
for (i = 0; i < nr_entries; i++) {
if (rela_orc_ip) {
if (!gelf_getrela(rela_orc_ip, i, &rela)) {
- WARN_ELF("gelf_getrela");
+ ERROR_ELF("gelf_getrela");
return -1;
}
if (!gelf_getsym(symtab, GELF_R_SYM(rela.r_info), &sym)) {
- WARN_ELF("gelf_getsym");
+ ERROR_ELF("gelf_getsym");
return -1;
}
if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) {
scn = elf_getscn(elf, sym.st_shndx);
if (!scn) {
- WARN_ELF("elf_getscn");
+ ERROR_ELF("elf_getscn");
return -1;
}
if (!gelf_getshdr(scn, &sh)) {
- WARN_ELF("gelf_getshdr");
+ ERROR_ELF("gelf_getshdr");
return -1;
}
name = elf_strptr(elf, shstrtab_idx, sh.sh_name);
if (!name) {
- WARN_ELF("elf_strptr");
+ ERROR_ELF("elf_strptr");
return -1;
}
} else {
name = elf_strptr(elf, strtab_idx, sym.st_name);
if (!name) {
- WARN_ELF("elf_strptr");
+ ERROR_ELF("elf_strptr");
return -1;
}
}
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 097a69db82a0..c80fed8a840e 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -54,7 +54,7 @@ static const struct special_entry entries[] = {
{},
};
-void __weak arch_handle_alternative(unsigned short feature, struct special_alt *alt)
+void __weak arch_handle_alternative(struct special_alt *alt)
{
}
@@ -86,27 +86,18 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry,
orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
if (!orig_reloc) {
- WARN_FUNC("can't find orig reloc", sec, offset + entry->orig);
+ ERROR_FUNC(sec, offset + entry->orig, "can't find orig reloc");
return -1;
}
reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off);
- if (entry->feature) {
- unsigned short feature;
-
- feature = bswap_if_needed(elf,
- *(unsigned short *)(sec->data->d_buf +
- offset +
- entry->feature));
- arch_handle_alternative(feature, alt);
- }
+ arch_handle_alternative(alt);
if (!entry->group || alt->new_len) {
new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new);
if (!new_reloc) {
- WARN_FUNC("can't find new reloc",
- sec, offset + entry->new);
+ ERROR_FUNC(sec, offset + entry->new, "can't find new reloc");
return -1;
}
@@ -122,8 +113,7 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry,
key_reloc = find_reloc_by_dest(elf, sec, offset + entry->key);
if (!key_reloc) {
- WARN_FUNC("can't find key reloc",
- sec, offset + entry->key);
+ ERROR_FUNC(sec, offset + entry->key, "can't find key reloc");
return -1;
}
alt->key_addend = reloc_addend(key_reloc);
@@ -153,8 +143,7 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
continue;
if (sec->sh.sh_size % entry->size != 0) {
- WARN("%s size not a multiple of %d",
- sec->name, entry->size);
+ ERROR("%s size not a multiple of %d", sec->name, entry->size);
return -1;
}
@@ -163,7 +152,7 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
for (idx = 0; idx < nr_entries; idx++) {
alt = malloc(sizeof(*alt));
if (!alt) {
- WARN("malloc failed");
+ ERROR_GLIBC("malloc failed");
return -1;
}
memset(alt, 0, sizeof(*alt));
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index eea95c6c0c71..b7769a22fe1a 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -29,6 +29,7 @@ include $(srctree)/tools/scripts/Makefile.arch
$(call detected_var,SRCARCH)
CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated
+CFLAGS += -I$(OUTPUT)libperf/arch/$(SRCARCH)/include/generated/uapi
# Additional ARCH settings for ppc
ifeq ($(SRCARCH),powerpc)
diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
index 49eeb2ad8dbd..27c1d5ebcd91 100644
--- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
@@ -481,3 +481,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index c844cd5cda62..1e8c44c7b614 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -381,3 +381,4 @@
464 n64 getxattrat sys_getxattrat
465 n64 listxattrat sys_listxattrat
466 n64 removexattrat sys_removexattrat
+467 n64 open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index d8b4ab78bef0..9a084bdb8926 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -557,3 +557,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index e9115b4d8b63..a4569b96ef06 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -469,3 +469,4 @@
464 common getxattrat sys_getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
index c8cad33bf250..52a7652fcff6 100644
--- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
@@ -470,3 +470,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
index 727f99d333b3..83e45eb6c095 100644
--- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
@@ -512,3 +512,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
index 4d0fb2fba7e2..ac007ea00979 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
@@ -396,7 +396,7 @@
381 i386 pkey_alloc sys_pkey_alloc
382 i386 pkey_free sys_pkey_free
383 i386 statx sys_statx
-384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
+384 i386 arch_prctl sys_arch_prctl
385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents
386 i386 rseq sys_rseq
393 i386 semget sys_semget
@@ -472,3 +472,4 @@
464 i386 getxattrat sys_getxattrat
465 i386 listxattrat sys_listxattrat
466 i386 removexattrat sys_removexattrat
+467 i386 open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 5eb708bff1c7..cfb5ca41e30d 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -390,6 +390,7 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
index 37effc1b134e..f657a77314f8 100644
--- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
@@ -437,3 +437,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index a4499e5a6f9c..e9fab20e9330 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -20,6 +20,7 @@ FILES=(
"include/uapi/linux/stat.h"
"include/linux/bits.h"
"include/vdso/bits.h"
+ "include/linux/cfi_types.h"
"include/linux/const.h"
"include/vdso/const.h"
"include/vdso/unaligned.h"
@@ -185,7 +186,7 @@ done
# diff with extra ignore lines
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
-check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
+check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"'
check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"'
check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh
index 60b3fa254cf6..f0b49f7fb57d 100755
--- a/tools/perf/tests/shell/trace_btf_enum.sh
+++ b/tools/perf/tests/shell/trace_btf_enum.sh
@@ -6,7 +6,7 @@ err=0
set -e
syscall="landlock_add_rule"
-non_syscall="timer:hrtimer_init,timer:hrtimer_start"
+non_syscall="timer:hrtimer_setup,timer:hrtimer_start"
TESTPROG="perf test -w landlock"
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index d18cc47e89bd..c3322eb3d686 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -392,6 +392,8 @@ struct ucred {
extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
+extern int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len,
+ void *data);
struct timespec64;
struct __kernel_timespec;
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
index 6e6907e63bfc..a15ac2fa4b20 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h
@@ -155,4 +155,8 @@
#define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */
#define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */
+/* Flags for execveat2(2). */
+#define AT_EXECVE_CHECK 0x10000 /* Only perform a check if execution
+ would be allowed. */
+
#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h
index 753971770733..e762e1af650c 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fs.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h
@@ -40,6 +40,15 @@
#define BLOCK_SIZE_BITS 10
#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
+/* flags for integrity meta */
+#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
+#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */
+#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */
+
+#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \
+ IO_INTEGRITY_CHK_REFTAG | \
+ IO_INTEGRITY_CHK_APPTAG)
+
#define SEEK_SET 0 /* seek relative to beginning of file */
#define SEEK_CUR 1 /* seek relative to current file position */
#define SEEK_END 2 /* seek relative to end of file */
@@ -203,10 +212,8 @@ struct fsxattr {
#define BLKROTATIONAL _IO(0x12,126)
#define BLKZEROOUT _IO(0x12,127)
#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
-/*
- * A jump here: 130-136 are reserved for zoned block devices
- * (see uapi/linux/blkzoned.h)
- */
+/* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */
+/* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */
#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
#define FIBMAP _IO(0x00,1) /* bmap access */
@@ -332,9 +339,13 @@ typedef int __bitwise __kernel_rwf_t;
/* Atomic Write */
#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040)
+/* buffered IO that drops the cache after reading or writing data */
+#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC)
+ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
+ RWF_DONTCACHE)
#define PROCFS_IOCTL_MAGIC 'f'
diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h
index c07008816aca..7fa67c2031a5 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/mount.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h
@@ -179,7 +179,12 @@ struct statmount {
__u32 opt_array; /* [str] Array of nul terminated fs options */
__u32 opt_sec_num; /* Number of security options */
__u32 opt_sec_array; /* [str] Array of nul terminated security options */
- __u64 __spare2[46];
+ __u64 supported_mask; /* Mask flags that this kernel supports */
+ __u32 mnt_uidmap_num; /* Number of uid mappings */
+ __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */
+ __u32 mnt_gidmap_num; /* Number of gid mappings */
+ __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */
+ __u64 __spare2[43];
char str[]; /* Variable size part containing strings */
};
@@ -217,6 +222,9 @@ struct mnt_id_req {
#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */
#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */
#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */
+#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */
+#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */
+#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */
/*
* Special @mnt_id values that can be passed to listmount
diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
index 5c6080680cb2..15c18ef4eb11 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h
@@ -353,4 +353,15 @@ struct prctl_mm_map {
*/
#define PR_LOCK_SHADOW_STACK_STATUS 76
+/*
+ * Controls the mode of timer_create() for CRIU restore operations.
+ * Enabling this allows CRIU to restore timers with explicit IDs.
+ *
+ * Don't use for normal operations as the result might be undefined.
+ */
+#define PR_TIMER_CREATE_RESTORE_IDS 77
+# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0
+# define PR_TIMER_CREATE_RESTORE_IDS_ON 1
+# define PR_TIMER_CREATE_RESTORE_IDS_GET 2
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h
index 887a25286441..f78ee3670dd5 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/stat.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h
@@ -98,43 +98,93 @@ struct statx_timestamp {
*/
struct statx {
/* 0x00 */
- __u32 stx_mask; /* What results were written [uncond] */
- __u32 stx_blksize; /* Preferred general I/O size [uncond] */
- __u64 stx_attributes; /* Flags conveying information about the file [uncond] */
+ /* What results were written [uncond] */
+ __u32 stx_mask;
+
+ /* Preferred general I/O size [uncond] */
+ __u32 stx_blksize;
+
+ /* Flags conveying information about the file [uncond] */
+ __u64 stx_attributes;
+
/* 0x10 */
- __u32 stx_nlink; /* Number of hard links */
- __u32 stx_uid; /* User ID of owner */
- __u32 stx_gid; /* Group ID of owner */
- __u16 stx_mode; /* File mode */
+ /* Number of hard links */
+ __u32 stx_nlink;
+
+ /* User ID of owner */
+ __u32 stx_uid;
+
+ /* Group ID of owner */
+ __u32 stx_gid;
+
+ /* File mode */
+ __u16 stx_mode;
__u16 __spare0[1];
+
/* 0x20 */
- __u64 stx_ino; /* Inode number */
- __u64 stx_size; /* File size */
- __u64 stx_blocks; /* Number of 512-byte blocks allocated */
- __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
+ /* Inode number */
+ __u64 stx_ino;
+
+ /* File size */
+ __u64 stx_size;
+
+ /* Number of 512-byte blocks allocated */
+ __u64 stx_blocks;
+
+ /* Mask to show what's supported in stx_attributes */
+ __u64 stx_attributes_mask;
+
/* 0x40 */
- struct statx_timestamp stx_atime; /* Last access time */
- struct statx_timestamp stx_btime; /* File creation time */
- struct statx_timestamp stx_ctime; /* Last attribute change time */
- struct statx_timestamp stx_mtime; /* Last data modification time */
+ /* Last access time */
+ struct statx_timestamp stx_atime;
+
+ /* File creation time */
+ struct statx_timestamp stx_btime;
+
+ /* Last attribute change time */
+ struct statx_timestamp stx_ctime;
+
+ /* Last data modification time */
+ struct statx_timestamp stx_mtime;
+
/* 0x80 */
- __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ /* Device ID of special file [if bdev/cdev] */
+ __u32 stx_rdev_major;
__u32 stx_rdev_minor;
- __u32 stx_dev_major; /* ID of device containing file [uncond] */
+
+ /* ID of device containing file [uncond] */
+ __u32 stx_dev_major;
__u32 stx_dev_minor;
+
/* 0x90 */
__u64 stx_mnt_id;
- __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
- __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
+
+ /* Memory buffer alignment for direct I/O */
+ __u32 stx_dio_mem_align;
+
+ /* File offset alignment for direct I/O */
+ __u32 stx_dio_offset_align;
+
/* 0xa0 */
- __u64 stx_subvol; /* Subvolume identifier */
- __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */
- __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */
+ /* Subvolume identifier */
+ __u64 stx_subvol;
+
+ /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_min;
+
+ /* Max atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max;
+
/* 0xb0 */
- __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */
- __u32 __spare1[1];
+ /* Max atomic write segment count */
+ __u32 stx_atomic_write_segments_max;
+
+ /* File offset alignment for direct I/O reads */
+ __u32 stx_dio_read_offset_align;
+
/* 0xb8 */
__u64 __spare3[9]; /* Spare space for future expansion */
+
/* 0x100 */
};
@@ -164,6 +214,7 @@ struct statx {
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
+#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h
index 4cd513215bcd..5a049eeaecce 100644
--- a/tools/perf/trace/beauty/include/uapi/sound/asound.h
+++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h
@@ -716,7 +716,7 @@ enum {
* Raw MIDI section - /dev/snd/midi??
*/
-#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4)
+#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 5)
enum {
SNDRV_RAWMIDI_STREAM_OUTPUT = 0,
@@ -728,6 +728,9 @@ enum {
#define SNDRV_RAWMIDI_INFO_INPUT 0x00000002
#define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004
#define SNDRV_RAWMIDI_INFO_UMP 0x00000008
+#define SNDRV_RAWMIDI_INFO_STREAM_INACTIVE 0x00000010
+
+#define SNDRV_RAWMIDI_DEVICE_UNKNOWN 0
struct snd_rawmidi_info {
unsigned int device; /* RO/WR (control): device number */
@@ -740,7 +743,8 @@ struct snd_rawmidi_info {
unsigned char subname[32]; /* name of active or selected subdevice */
unsigned int subdevices_count;
unsigned int subdevices_avail;
- unsigned char reserved[64]; /* reserved for future use */
+ int tied_device; /* R: tied rawmidi device (UMP/legacy) */
+ unsigned char reserved[60]; /* reserved for future use */
};
#define SNDRV_RAWMIDI_MODE_FRAMING_MASK (7<<0)
diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c
index 9d0ce88e90e4..456ce64ad822 100644
--- a/tools/perf/util/amd-sample-raw.c
+++ b/tools/perf/util/amd-sample-raw.c
@@ -9,7 +9,7 @@
#include <inttypes.h>
#include <linux/string.h>
-#include "../../arch/x86/include/asm/amd-ibs.h"
+#include "../../arch/x86/include/asm/amd/ibs.h"
#include "debug.h"
#include "session.h"
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1974395492d7..3c030da2e477 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2566,25 +2566,6 @@ check:
return false;
}
-static bool evsel__handle_error_quirks(struct evsel *evsel, int error)
-{
- /*
- * AMD core PMU tries to forward events with precise_ip to IBS PMU
- * implicitly. But IBS PMU has more restrictions so it can fail with
- * supported event attributes. Let's forward it back to the core PMU
- * by clearing precise_ip only if it's from precise_max (:P).
- */
- if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() &&
- evsel->core.attr.precise_ip && evsel->precise_max) {
- evsel->core.attr.precise_ip = 0;
- pr_debug2_peo("removing precise_ip on AMD\n");
- display_attr(&evsel->core.attr);
- return true;
- }
-
- return false;
-}
-
static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads,
int start_cpu_map_idx, int end_cpu_map_idx)
@@ -2730,9 +2711,6 @@ try_fallback:
if (evsel__precise_ip_fallback(evsel))
goto retry_open;
- if (evsel__handle_error_quirks(evsel, err))
- goto retry_open;
-
out_close:
if (err)
threads->err_thread = thread;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 9fb2c1343c7f..0b037e7389a0 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -371,7 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
* has to be pointed by symsrc_filename
*/
if (ofs == 0) {
- if (dso__data_get_fd(dso, machine, &fd) {
+ if (dso__data_get_fd(dso, machine, &fd)) {
ofs = elf_section_offset(fd, ".debug_frame");
dso__data_put_fd(dso);
}
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 99bf905ade81..b74ed916057e 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -100,7 +100,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou
.PP
\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names.
.PP
-\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other".
+\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "idle_pct". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "idle_pct". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle".
.PP
\fB--Dump\fP displays the raw counter values.
.PP
@@ -158,16 +158,22 @@ The system configuration dump (if --quiet is not used) is followed by statistics
.PP
\fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs.
.PP
-\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system.
+\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default.
.PP
-\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved.
+\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default.
.PP
-\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters.
+\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. These counters are in the "cpuidle" group, which is disabled, by default.
+.PP
+\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. These counters are in the "pct_idle" group, which is enabled by default.
+.PP
+\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters and are in the "hwidle" group, which is enabled, by default.
.PP
\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
.PP
\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
.PP
+\fBCoreThr\fP Core Thermal Throttling events during the measurement interval. Note that events since boot can be find in /sys/devices/system/cpu/cpu*/thermal_throttle/*
+.PP
\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
.PP
\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
@@ -199,6 +205,8 @@ The system configuration dump (if --quiet is not used) is followed by statistics
\fBUncMHz\fP per-package uncore MHz, instantaneous sample.
.PP
\fBUMHz1.0\fP per-package uncore MHz for domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages.
+Intel Granite Rapids systems use domains 0-2 for CPUs, and 3-4 for IO, with cluster always 0.
+For the "--show" and "--hide" options, use "UncMHz" to operate on all UMHz*.* as a group.
.SH TOO MUCH INFORMATION EXAMPLE
By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters.
This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 26057af6b5a1..0170d3cc6819 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -153,7 +153,7 @@ struct msr_counter bic[] = {
{ 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 },
- { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 },
@@ -206,6 +206,7 @@ struct msr_counter bic[] = {
{ 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 },
{ 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 },
+ { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -219,7 +220,7 @@ struct msr_counter bic[] = {
#define BIC_TSC_MHz (1ULL << 7)
#define BIC_IRQ (1ULL << 8)
#define BIC_SMI (1ULL << 9)
-#define BIC_sysfs (1ULL << 10)
+#define BIC_cpuidle (1ULL << 10)
#define BIC_CPU_c1 (1ULL << 11)
#define BIC_CPU_c3 (1ULL << 12)
#define BIC_CPU_c6 (1ULL << 13)
@@ -272,17 +273,20 @@ struct msr_counter bic[] = {
#define BIC_Sys_J (1ULL << 60)
#define BIC_NMI (1ULL << 61)
#define BIC_CPU_c1e (1ULL << 62)
-
-#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
-#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
-#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
+#define BIC_pct_idle (1ULL << 63)
+
+#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die)
+#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt)
+#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
+#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6)
+#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle )
+#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle)
#define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
-#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
+#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle)
unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
-unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC;
#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME)
@@ -1121,7 +1125,7 @@ end:
int backwards_count;
char *progname;
-#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
+#define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */
cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset;
size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size;
#define MAX_ADDED_THREAD_COUNTERS 24
@@ -2211,7 +2215,7 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
return 0;
}
-int probe_msr(int cpu, off_t offset)
+int probe_rapl_msr(int cpu, off_t offset, int index)
{
ssize_t retval;
unsigned long long value;
@@ -2220,13 +2224,22 @@ int probe_msr(int cpu, off_t offset)
retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset);
- /*
- * Expect MSRs to accumulate some non-zero value since the system was powered on.
- * Treat zero as a read failure.
- */
- if (retval != sizeof(value) || value == 0)
+ /* if the read failed, the probe fails */
+ if (retval != sizeof(value))
return 1;
+ /* If an Energy Status Counter MSR returns 0, the probe fails */
+ switch (index) {
+ case RAPL_RCI_INDEX_ENERGY_PKG:
+ case RAPL_RCI_INDEX_ENERGY_CORES:
+ case RAPL_RCI_INDEX_DRAM:
+ case RAPL_RCI_INDEX_GFX:
+ case RAPL_RCI_INDEX_ENERGY_PLATFORM:
+ if (value == 0)
+ return 1;
+ }
+
+ /* PKG,DRAM_PERF_STATUS MSRs, can return any value */
return 0;
}
@@ -2345,16 +2358,25 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
retval |= ~0;
break;
} else if (!strcmp(name_list, "topology")) {
- retval |= BIC_TOPOLOGY;
+ retval |= BIC_GROUP_TOPOLOGY;
break;
} else if (!strcmp(name_list, "power")) {
- retval |= BIC_THERMAL_PWR;
+ retval |= BIC_GROUP_THERMAL_PWR;
break;
} else if (!strcmp(name_list, "idle")) {
- retval |= BIC_IDLE;
+ retval |= BIC_GROUP_IDLE;
+ break;
+ } else if (!strcmp(name_list, "swidle")) {
+ retval |= BIC_GROUP_SW_IDLE;
+ break;
+ } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */
+ retval |= BIC_GROUP_SW_IDLE;
+ break;
+ } else if (!strcmp(name_list, "hwidle")) {
+ retval |= BIC_GROUP_HW_IDLE;
break;
} else if (!strcmp(name_list, "frequency")) {
- retval |= BIC_FREQUENCY;
+ retval |= BIC_GROUP_FREQUENCY;
break;
} else if (!strcmp(name_list, "other")) {
retval |= BIC_OTHER;
@@ -2363,6 +2385,7 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
}
if (i == MAX_BIC) {
+ fprintf(stderr, "deferred %s\n", name_list);
if (mode == SHOW_LIST) {
deferred_add_names[deferred_add_index++] = name_list;
if (deferred_add_index >= MAX_DEFERRED) {
@@ -3476,7 +3499,7 @@ void delta_core(struct core_data *new, struct core_data *old)
old->c6 = new->c6 - old->c6;
old->c7 = new->c7 - old->c7;
old->core_temp_c = new->core_temp_c;
- old->core_throt_cnt = new->core_throt_cnt;
+ old->core_throt_cnt = new->core_throt_cnt - old->core_throt_cnt;
old->mc6_us = new->mc6_us - old->mc6_us;
DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
@@ -6030,6 +6053,7 @@ int snapshot_graphics(int idx)
int retval;
rewind(gfx_info[idx].fp);
+ fflush(gfx_info[idx].fp);
switch (idx) {
case GFX_rc6:
@@ -6703,7 +6727,18 @@ static void probe_intel_uncore_frequency_cluster(void)
sprintf(path, "%s/current_freq_khz", path_base);
sprintf(name_buf, "UMHz%d.%d", domain_id, cluster_id);
- add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id);
+ /*
+ * Once add_couter() is called, that counter is always read
+ * and reported -- So it is effectively (enabled & present).
+ * Only call add_counter() here if legacy BIC_UNCORE_MHZ (UncMHz)
+ * is (enabled). Since we are in this routine, we
+ * know we will not probe and set (present) the legacy counter.
+ *
+ * This allows "--show/--hide UncMHz" to be effective for
+ * the clustered MHz counters, as a group.
+ */
+ if BIC_IS_ENABLED(BIC_UNCORE_MHZ)
+ add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id);
if (quiet)
continue;
@@ -7896,7 +7931,7 @@ void rapl_perf_init(void)
rci->flags[cai->rci_index] = cai->flags;
/* Use MSR for this counter */
- } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+ } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) {
rci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
rci->msr[cai->rci_index] = cai->msr;
rci->msr_mask[cai->rci_index] = cai->msr_mask;
@@ -8034,7 +8069,7 @@ void msr_perf_init_(void)
cai->present = true;
/* User MSR for this counter */
- } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+ } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) {
cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
cci->msr[cai->rci_index] = cai->msr;
cci->msr_mask[cai->rci_index] = cai->msr_mask;
@@ -8148,7 +8183,7 @@ void cstate_perf_init_(bool soft_c1)
/* User MSR for this counter */
} else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit
- && probe_msr(cpu, cai->msr) == 0) {
+ && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) {
cci->source[cai->rci_index] = COUNTER_SOURCE_MSR;
cci->msr[cai->rci_index] = cai->msr;
}
@@ -9559,7 +9594,7 @@ int get_and_dump_counters(void)
void print_version()
{
- fprintf(outf, "turbostat version 2025.02.02 - Len Brown <lenb@kernel.org>\n");
+ fprintf(outf, "turbostat version 2025.04.06 - Len Brown <lenb@kernel.org>\n");
}
#define COMMAND_LINE_SIZE 2048
@@ -9592,7 +9627,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name)
for (mp = head; mp; mp = mp->next) {
if (debug)
fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name);
- if (!strncmp(name, mp->name, strlen(mp->name)))
+ if (!strcmp(name, mp->name))
return mp;
}
return NULL;
@@ -10239,14 +10274,18 @@ int is_deferred_skip(char *name)
return 0;
}
-void probe_sysfs(void)
+void probe_cpuidle_residency(void)
{
char path[64];
char name_buf[16];
FILE *input;
int state;
+ int min_state = 1024, max_state = 0;
char *sp;
+ if (!DO_BIC(BIC_pct_idle))
+ return;
+
for (state = 10; state >= 0; --state) {
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
@@ -10269,14 +10308,32 @@ void probe_sysfs(void)
sprintf(path, "cpuidle/state%d/time", state);
- if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+ if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf))
continue;
if (is_deferred_skip(name_buf))
continue;
add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0);
+
+ if (state > max_state)
+ max_state = state;
+ if (state < min_state)
+ min_state = state;
}
+}
+
+void probe_cpuidle_counts(void)
+{
+ char path[64];
+ char name_buf[16];
+ FILE *input;
+ int state;
+ int min_state = 1024, max_state = 0;
+ char *sp;
+
+ if (!DO_BIC(BIC_cpuidle))
+ return;
for (state = 10; state >= 0; --state) {
@@ -10286,26 +10343,52 @@ void probe_sysfs(void)
continue;
if (!fgets(name_buf, sizeof(name_buf), input))
err(1, "%s: failed to read file", path);
- /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
- sp = strchr(name_buf, '-');
- if (!sp)
- sp = strchrnul(name_buf, '\n');
- *sp = '\0';
fclose(input);
remove_underbar(name_buf);
- sprintf(path, "cpuidle/state%d/usage", state);
-
- if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf))
+ if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf))
continue;
if (is_deferred_skip(name_buf))
continue;
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ sp = strchr(name_buf, '-');
+ if (!sp)
+ sp = strchrnul(name_buf, '\n');
+
+ /*
+ * The 'below' sysfs file always contains 0 for the deepest state (largest index),
+ * do not add it.
+ */
+ if (state != max_state) {
+ /*
+ * Add 'C1+' for C1, and so on. The 'below' sysfs file always contains 0 for
+ * the last state, so do not add it.
+ */
+
+ *sp = '+';
+ *(sp + 1) = '\0';
+ sprintf(path, "cpuidle/state%d/below", state);
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
+ }
+
+ *sp = '\0';
+ sprintf(path, "cpuidle/state%d/usage", state);
add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
- }
+ /*
+ * The 'above' sysfs file always contains 0 for the shallowest state (smallest
+ * index), do not add it.
+ */
+ if (state != min_state) {
+ *sp = '-';
+ *(sp + 1) = '\0';
+ sprintf(path, "cpuidle/state%d/above", state);
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU, 0);
+ }
+ }
}
/*
@@ -10549,7 +10632,8 @@ skip_cgroup_setting:
print_bootcmd();
}
- probe_sysfs();
+ probe_cpuidle_residency();
+ probe_cpuidle_counts();
if (!getuid())
set_rlimit();
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index dc4333d23189..8787048c6762 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -586,36 +586,48 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
}
}
-#define READ_ONCE(x) \
-({ \
- union { typeof(x) __val; char __c[1]; } __u = \
- { .__c = { 0 } }; \
- __read_once_size(&(x), __u.__c, sizeof(x)); \
- __u.__val; \
-})
-
-#define WRITE_ONCE(x, val) \
-({ \
- union { typeof(x) __val; char __c[1]; } __u = \
- { .__val = (val) }; \
- __write_once_size(&(x), __u.__c, sizeof(x)); \
- __u.__val; \
-})
-
-#define READ_ONCE_ARENA(type, x) \
-({ \
- union { type __val; char __c[1]; } __u = \
- { .__c = { 0 } }; \
- __read_once_size((void *)&(x), __u.__c, sizeof(x)); \
- __u.__val; \
+/*
+ * __unqual_typeof(x) - Declare an unqualified scalar type, leaving
+ * non-scalar types unchanged,
+ *
+ * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char'
+ * is not type-compatible with 'signed char', and we define a separate case.
+ *
+ * This is copied verbatim from kernel's include/linux/compiler_types.h, but
+ * with default expression (for pointers) changed from (x) to (typeof(x)0).
+ *
+ * This is because LLVM has a bug where for lvalue (x), it does not get rid of
+ * an extra address_space qualifier, but does in case of rvalue (typeof(x)0).
+ * Hence, for pointers, we need to create an rvalue expression to get the
+ * desired type. See https://github.com/llvm/llvm-project/issues/53400.
+ */
+#define __scalar_type_to_expr_cases(type) \
+ unsigned type : (unsigned type)0, signed type : (signed type)0
+
+#define __unqual_typeof(x) \
+ typeof(_Generic((x), \
+ char: (char)0, \
+ __scalar_type_to_expr_cases(char), \
+ __scalar_type_to_expr_cases(short), \
+ __scalar_type_to_expr_cases(int), \
+ __scalar_type_to_expr_cases(long), \
+ __scalar_type_to_expr_cases(long long), \
+ default: (typeof(x))0))
+
+#define READ_ONCE(x) \
+({ \
+ union { __unqual_typeof(x) __val; char __c[1]; } __u = \
+ { .__c = { 0 } }; \
+ __read_once_size((__unqual_typeof(x) *)&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
})
-#define WRITE_ONCE_ARENA(type, x, val) \
-({ \
- union { type __val; char __c[1]; } __u = \
- { .__val = (val) }; \
- __write_once_size((void *)&(x), __u.__c, sizeof(x)); \
- __u.__val; \
+#define WRITE_ONCE(x, val) \
+({ \
+ union { __unqual_typeof(x) __val; char __c[1]; } __u = \
+ { .__val = (val) }; \
+ __write_once_size((__unqual_typeof(x) *)&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
})
/*
@@ -648,6 +660,23 @@ static inline u32 log2_u64(u64 v)
return log2_u32(v) + 1;
}
+/*
+ * Return a value proportionally scaled to the task's weight.
+ */
+static inline u64 scale_by_task_weight(const struct task_struct *p, u64 value)
+{
+ return (value * p->scx.weight) / 100;
+}
+
+/*
+ * Return a value inversely proportional to the task's weight.
+ */
+static inline u64 scale_by_task_weight_inverse(const struct task_struct *p, u64 value)
+{
+ return value * 100 / p->scx.weight;
+}
+
+
#include "compat.bpf.h"
#include "enums.bpf.h"
diff --git a/tools/sched_ext/include/scx/enum_defs.autogen.h b/tools/sched_ext/include/scx/enum_defs.autogen.h
index 6e6c45f14fe1..c2c33df9292c 100644
--- a/tools/sched_ext/include/scx/enum_defs.autogen.h
+++ b/tools/sched_ext/include/scx/enum_defs.autogen.h
@@ -88,6 +88,8 @@
#define HAVE_SCX_OPS_ENQ_LAST
#define HAVE_SCX_OPS_ENQ_EXITING
#define HAVE_SCX_OPS_SWITCH_PARTIAL
+#define HAVE_SCX_OPS_ENQ_MIGRATION_DISABLED
+#define HAVE_SCX_OPS_ALLOW_QUEUED_WAKEUP
#define HAVE_SCX_OPS_HAS_CGROUP_WEIGHT
#define HAVE_SCX_OPS_ALL_FLAGS
#define HAVE_SCX_OPSS_NONE
@@ -104,6 +106,7 @@
#define HAVE_SCX_RQ_BAL_PENDING
#define HAVE_SCX_RQ_BAL_KEEP
#define HAVE_SCX_RQ_BYPASSING
+#define HAVE_SCX_RQ_CLK_VALID
#define HAVE_SCX_RQ_IN_WAKEUP
#define HAVE_SCX_RQ_IN_BALANCE
#define HAVE_SCX_TASK_NONE
diff --git a/tools/sched_ext/include/scx/enums.autogen.bpf.h b/tools/sched_ext/include/scx/enums.autogen.bpf.h
index 0e941a0d6f88..2f8002bcc19a 100644
--- a/tools/sched_ext/include/scx/enums.autogen.bpf.h
+++ b/tools/sched_ext/include/scx/enums.autogen.bpf.h
@@ -13,6 +13,30 @@ const volatile u64 __SCX_SLICE_DFL __weak;
const volatile u64 __SCX_SLICE_INF __weak;
#define SCX_SLICE_INF __SCX_SLICE_INF
+const volatile u64 __SCX_RQ_ONLINE __weak;
+#define SCX_RQ_ONLINE __SCX_RQ_ONLINE
+
+const volatile u64 __SCX_RQ_CAN_STOP_TICK __weak;
+#define SCX_RQ_CAN_STOP_TICK __SCX_RQ_CAN_STOP_TICK
+
+const volatile u64 __SCX_RQ_BAL_PENDING __weak;
+#define SCX_RQ_BAL_PENDING __SCX_RQ_BAL_PENDING
+
+const volatile u64 __SCX_RQ_BAL_KEEP __weak;
+#define SCX_RQ_BAL_KEEP __SCX_RQ_BAL_KEEP
+
+const volatile u64 __SCX_RQ_BYPASSING __weak;
+#define SCX_RQ_BYPASSING __SCX_RQ_BYPASSING
+
+const volatile u64 __SCX_RQ_CLK_VALID __weak;
+#define SCX_RQ_CLK_VALID __SCX_RQ_CLK_VALID
+
+const volatile u64 __SCX_RQ_IN_WAKEUP __weak;
+#define SCX_RQ_IN_WAKEUP __SCX_RQ_IN_WAKEUP
+
+const volatile u64 __SCX_RQ_IN_BALANCE __weak;
+#define SCX_RQ_IN_BALANCE __SCX_RQ_IN_BALANCE
+
const volatile u64 __SCX_DSQ_FLAG_BUILTIN __weak;
#define SCX_DSQ_FLAG_BUILTIN __SCX_DSQ_FLAG_BUILTIN
diff --git a/tools/sched_ext/include/scx/enums.autogen.h b/tools/sched_ext/include/scx/enums.autogen.h
index 88137a140e72..fedec938584b 100644
--- a/tools/sched_ext/include/scx/enums.autogen.h
+++ b/tools/sched_ext/include/scx/enums.autogen.h
@@ -8,6 +8,14 @@
SCX_ENUM_SET(skel, scx_public_consts, SCX_OPS_NAME_LEN); \
SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_DFL); \
SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_INF); \
+ SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_ONLINE); \
+ SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_CAN_STOP_TICK); \
+ SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_BAL_PENDING); \
+ SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_BAL_KEEP); \
+ SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_BYPASSING); \
+ SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_CLK_VALID); \
+ SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_IN_WAKEUP); \
+ SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_IN_BALANCE); \
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_BUILTIN); \
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_LOCAL_ON); \
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_INVALID); \
diff --git a/tools/sched_ext/include/scx/enums.h b/tools/sched_ext/include/scx/enums.h
index 34cbebe974b7..8e7c91575f0b 100644
--- a/tools/sched_ext/include/scx/enums.h
+++ b/tools/sched_ext/include/scx/enums.h
@@ -14,7 +14,8 @@ static inline void __ENUM_set(u64 *val, char *type, char *name)
bool res;
res = __COMPAT_read_enum(type, name, val);
- SCX_BUG_ON(!res, "enum not found(%s)", name);
+ if (!res)
+ *val = 0;
}
#define SCX_ENUM_SET(skel, type, name) do { \
diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c
index 2c720e3ecad5..fdc7170639e6 100644
--- a/tools/sched_ext/scx_flatcg.bpf.c
+++ b/tools/sched_ext/scx_flatcg.bpf.c
@@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops,
.cgroup_move = (void *)fcg_cgroup_move,
.init = (void *)fcg_init,
.exit = (void *)fcg_exit,
- .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING,
+ .flags = SCX_OPS_ENQ_EXITING,
.name = "flatcg");
diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl
index ebbdb3c42e9f..580b4e246aec 100644
--- a/tools/scripts/syscall.tbl
+++ b/tools/scripts/syscall.tbl
@@ -407,3 +407,4 @@
464 common getxattrat sys_getxattrat
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 0a6572ab6f37..387f3df8b988 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -61,8 +61,11 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o
cxl_core-y += $(CXL_CORE_SRC)/hdm.o
cxl_core-y += $(CXL_CORE_SRC)/pmu.o
cxl_core-y += $(CXL_CORE_SRC)/cdat.o
+cxl_core-y += $(CXL_CORE_SRC)/ras.o
+cxl_core-y += $(CXL_CORE_SRC)/acpi.o
cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o
cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
+cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
cxl_core-y += config_check.o
cxl_core-y += cxl_core_test.o
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index cc8948f49117..1c3336095923 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -155,7 +155,7 @@ static struct {
} cfmws7;
struct {
struct acpi_cedt_cfmws cfmws;
- u32 target[4];
+ u32 target[3];
} cfmws8;
struct {
struct acpi_cedt_cxims cxims;
@@ -331,14 +331,14 @@ static struct {
.length = sizeof(mock_cedt.cfmws8),
},
.interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR,
- .interleave_ways = 2,
- .granularity = 0,
+ .interleave_ways = 8,
+ .granularity = 1,
.restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 |
ACPI_CEDT_CFMWS_RESTRICT_PMEM,
.qtg_id = FAKE_QTG_ID,
- .window_size = SZ_256M * 16UL,
+ .window_size = SZ_512M * 6UL,
},
- .target = { 0, 1, 0, 1, },
+ .target = { 0, 1, 2, },
},
.cxims0 = {
.cxims = {
@@ -1000,25 +1000,21 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
find_cxl_root(port);
struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev);
struct cxl_dev_state *cxlds = cxlmd->cxlds;
- struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
struct access_coordinate ep_c[ACCESS_COORDINATE_MAX];
- struct range pmem_range = {
- .start = cxlds->pmem_res.start,
- .end = cxlds->pmem_res.end,
- };
- struct range ram_range = {
- .start = cxlds->ram_res.start,
- .end = cxlds->ram_res.end,
- };
if (!cxl_root)
return;
- if (range_len(&ram_range))
- dpa_perf_setup(port, &ram_range, &mds->ram_perf);
+ for (int i = 0; i < cxlds->nr_partitions; i++) {
+ struct resource *res = &cxlds->part[i].res;
+ struct cxl_dpa_perf *perf = &cxlds->part[i].perf;
+ struct range range = {
+ .start = res->start,
+ .end = res->end,
+ };
- if (range_len(&pmem_range))
- dpa_perf_setup(port, &pmem_range, &mds->pmem_perf);
+ dpa_perf_setup(port, &range, perf);
+ }
cxl_memdev_update_perf(cxlmd);
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 9495dbcc03a7..bf9caa908f89 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -78,6 +78,10 @@ static struct cxl_cel_entry mock_cel[] = {
.effect = CXL_CMD_EFFECT_NONE,
},
{
+ .opcode = cpu_to_le16(CXL_MBOX_OP_SET_SHUTDOWN_STATE),
+ .effect = POLICY_CHANGE_IMMEDIATE,
+ },
+ {
.opcode = cpu_to_le16(CXL_MBOX_OP_GET_POISON),
.effect = CXL_CMD_EFFECT_NONE,
},
@@ -178,6 +182,7 @@ struct cxl_mockmem_data {
u64 timestamp;
unsigned long sanitize_timeout;
struct vendor_test_feat test_feat;
+ u8 shutdown_state;
};
static struct mock_event_log *event_find_log(struct device *dev, int log_type)
@@ -1105,6 +1110,21 @@ static int mock_health_info(struct cxl_mbox_cmd *cmd)
return 0;
}
+static int mock_set_shutdown_state(struct cxl_mockmem_data *mdata,
+ struct cxl_mbox_cmd *cmd)
+{
+ struct cxl_mbox_set_shutdown_state_in *ss = cmd->payload_in;
+
+ if (cmd->size_in != sizeof(*ss))
+ return -EINVAL;
+
+ if (cmd->size_out != 0)
+ return -EINVAL;
+
+ mdata->shutdown_state = ss->state;
+ return 0;
+}
+
static struct mock_poison {
struct cxl_dev_state *cxlds;
u64 dpa;
@@ -1583,6 +1603,9 @@ static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox,
case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE:
rc = mock_passphrase_secure_erase(mdata, cmd);
break;
+ case CXL_MBOX_OP_SET_SHUTDOWN_STATE:
+ rc = mock_set_shutdown_state(mdata, cmd);
+ break;
case CXL_MBOX_OP_GET_POISON:
rc = mock_get_poison(cxlds, cmd);
break;
@@ -1670,6 +1693,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
struct cxl_dev_state *cxlds;
struct cxl_mockmem_data *mdata;
struct cxl_mailbox *cxl_mbox;
+ struct cxl_dpa_info range_info = { 0 };
int rc;
mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL);
@@ -1709,7 +1733,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf;
INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work);
- cxlds->serial = pdev->id;
+ cxlds->serial = pdev->id + 1;
if (is_rcd(pdev))
cxlds->rcd = true;
@@ -1730,7 +1754,11 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
- rc = cxl_mem_create_range_info(mds);
+ rc = cxl_mem_dpa_fetch(mds, &range_info);
+ if (rc)
+ return rc;
+
+ rc = cxl_dpa_setup(cxlds, &range_info);
if (rc)
return rc;
@@ -1752,7 +1780,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
- rc = devm_cxl_setup_fwctl(cxlmd);
+ rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd);
if (rc)
dev_dbg(dev, "No CXL FWCTL setup\n");
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index cdd9782f9646..422e186cf3cf 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -20,6 +20,7 @@ CONFIG_VFAT_FS=y
CONFIG_PCI=y
CONFIG_USB4=y
+CONFIG_I2C=y
CONFIG_NET=y
CONFIG_MCTP=y
@@ -43,6 +44,8 @@ CONFIG_REGMAP_BUILD=y
CONFIG_AUDIT=y
+CONFIG_PRIME_NUMBERS=y
+
CONFIG_SECURITY=y
CONFIG_SECURITY_APPARMOR=y
CONFIG_SECURITY_LANDLOCK=y
@@ -51,3 +54,4 @@ CONFIG_SOUND=y
CONFIG_SND=y
CONFIG_SND_SOC=y
CONFIG_SND_SOC_TOPOLOGY_BUILD=y
+CONFIG_SND_SOC_CS35L56_I2C=y
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index da53a709773a..c176487356e6 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -809,6 +809,10 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest:
test.log.extend(parse_diagnostic(lines))
if test.name != "" and not peek_test_name_match(lines, test):
test.add_error(printer, 'missing subtest result line!')
+ elif not lines:
+ print_log(test.log, printer)
+ test.status = TestStatus.NO_TESTS
+ test.add_error(printer, 'No more test results!')
else:
parse_test_result(lines, test, expected_num, printer)
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 5ff4f6ffd873..bbba921e0eac 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -371,8 +371,8 @@ class KUnitParserTest(unittest.TestCase):
"""
result = kunit_parser.parse_run_tests(output.splitlines(), stdout)
# Missing test results after test plan should alert a suspected test crash.
- self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status)
- self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, crashed=1, errors=1))
+ self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status)
+ self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=2))
def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream:
return kunit_parser.LineStream(enumerate(strs, start=1))
diff --git a/tools/testing/kunit/qemu_configs/sh.py b/tools/testing/kunit/qemu_configs/sh.py
index 78a474a5b95f..f00cb89fdef6 100644
--- a/tools/testing/kunit/qemu_configs/sh.py
+++ b/tools/testing/kunit/qemu_configs/sh.py
@@ -7,7 +7,9 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
CONFIG_MEMORY_START=0x0c000000
CONFIG_SH_RTS7751R2D=y
CONFIG_RTS7751R2D_PLUS=y
-CONFIG_SERIAL_SH_SCI=y''',
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_CMDLINE_EXTEND=y
+''',
qemu_arch='sh4',
kernel_path='arch/sh/boot/zImage',
kernel_command_line='console=ttySC1',
diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h
index 1cf82acb2a3e..0ab4b53bb4f3 100644
--- a/tools/testing/memblock/internal.h
+++ b/tools/testing/memblock/internal.h
@@ -24,4 +24,10 @@ static inline void accept_memory(phys_addr_t start, unsigned long size)
{
}
+static inline unsigned long free_reserved_area(void *start, void *end,
+ int poison, const char *s)
+{
+ return 0;
+}
+
#endif
diff --git a/tools/testing/memblock/linux/mutex.h b/tools/testing/memblock/linux/mutex.h
new file mode 100644
index 000000000000..ae3f497165d6
--- /dev/null
+++ b/tools/testing/memblock/linux/mutex.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MUTEX_H
+#define _MUTEX_H
+
+#define DEFINE_MUTEX(name) int name
+
+static inline void dummy_mutex_guard(int *name)
+{
+}
+
+#define guard(mutex) \
+ dummy_##mutex##_guard
+
+#endif /* _MUTEX_H */ \ No newline at end of file
diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 67503089e6a0..01e836fba488 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -2434,6 +2434,107 @@ static int memblock_overlaps_region_checks(void)
return 0;
}
+#ifdef CONFIG_NUMA
+static int memblock_set_node_check(void)
+{
+ unsigned long i, max_reserved;
+ struct memblock_region *rgn;
+ void *orig_region;
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ dummy_physical_memory_init();
+ memblock_add(dummy_physical_memory_base(), MEM_SIZE);
+ orig_region = memblock.reserved.regions;
+
+ /* Equally Split range to node 0 and 1*/
+ memblock_set_node(memblock_start_of_DRAM(),
+ memblock_phys_mem_size() / 2, &memblock.memory, 0);
+ memblock_set_node(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2,
+ memblock_phys_mem_size() / 2, &memblock.memory, 1);
+
+ ASSERT_EQ(memblock.memory.cnt, 2);
+ rgn = &memblock.memory.regions[0];
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM());
+ ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2);
+ ASSERT_EQ(memblock_get_region_node(rgn), 0);
+ rgn = &memblock.memory.regions[1];
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM() + memblock_phys_mem_size() / 2);
+ ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2);
+ ASSERT_EQ(memblock_get_region_node(rgn), 1);
+
+ /* Reserve 126 regions with the last one across node boundary */
+ for (i = 0; i < 125; i++)
+ memblock_reserve(memblock_start_of_DRAM() + SZ_16 * i, SZ_8);
+
+ memblock_reserve(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2 - SZ_8,
+ SZ_16);
+
+ /*
+ * Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()")
+ * do following process to set nid to each memblock.reserved region.
+ * But it may miss some region if memblock_set_node() double the
+ * array.
+ *
+ * By checking 'max', we make sure all region nid is set properly.
+ */
+repeat:
+ max_reserved = memblock.reserved.max;
+ for_each_mem_region(rgn) {
+ int nid = memblock_get_region_node(rgn);
+
+ memblock_set_node(rgn->base, rgn->size, &memblock.reserved, nid);
+ }
+ if (max_reserved != memblock.reserved.max)
+ goto repeat;
+
+ /* Confirm each region has valid node set */
+ for_each_reserved_mem_region(rgn) {
+ ASSERT_TRUE(numa_valid_node(memblock_get_region_node(rgn)));
+ if (rgn == (memblock.reserved.regions + memblock.reserved.cnt - 1))
+ ASSERT_EQ(1, memblock_get_region_node(rgn));
+ else
+ ASSERT_EQ(0, memblock_get_region_node(rgn));
+ }
+
+ dummy_physical_memory_cleanup();
+
+ /*
+ * The current reserved.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.reserved.regions = orig_region;
+ memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_set_node_checks(void)
+{
+ prefix_reset();
+ prefix_push("memblock_set_node");
+ test_print("Running memblock_set_node tests...\n");
+
+ memblock_set_node_check();
+
+ prefix_pop();
+
+ return 0;
+}
+#else
+static int memblock_set_node_checks(void)
+{
+ return 0;
+}
+#endif
+
int memblock_basic_checks(void)
{
memblock_initialization_check();
@@ -2444,6 +2545,7 @@ int memblock_basic_checks(void)
memblock_bottom_up_checks();
memblock_trim_memory_checks();
memblock_overlaps_region_checks();
+ memblock_set_node_checks();
return 0;
}
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 8b3591a51e1f..b2a6660bbd92 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -14,6 +14,7 @@ include ../shared/shared.mk
main: $(OFILES)
+xarray.o: ../../../lib/test_xarray.c
idr-test.o: ../../../lib/test_ida.c
idr-test: idr-test.o $(CORE_OFILES)
diff --git a/tools/testing/rbtree/Makefile b/tools/testing/rbtree/Makefile
new file mode 100644
index 000000000000..d7bbae2af4c7
--- /dev/null
+++ b/tools/testing/rbtree/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+
+.PHONY: clean
+
+TARGETS = rbtree_test interval_tree_test
+OFILES = $(SHARED_OFILES) rbtree-shim.o interval_tree-shim.o maple-shim.o
+DEPS = ../../../include/linux/rbtree.h \
+ ../../../include/linux/rbtree_types.h \
+ ../../../include/linux/rbtree_augmented.h \
+ ../../../include/linux/interval_tree.h \
+ ../../../include/linux/interval_tree_generic.h \
+ ../../../lib/rbtree.c \
+ ../../../lib/interval_tree.c
+
+targets: $(TARGETS)
+
+include ../shared/shared.mk
+
+ifeq ($(DEBUG), 1)
+ CFLAGS += -g
+endif
+
+$(TARGETS): $(OFILES)
+
+rbtree-shim.o: $(DEPS)
+rbtree_test.o: ../../../lib/rbtree_test.c
+interval_tree-shim.o: $(DEPS)
+interval_tree-shim.o: CFLAGS += -DCONFIG_INTERVAL_TREE_SPAN_ITER
+interval_tree_test.o: ../../../lib/interval_tree_test.c
+interval_tree_test.o: CFLAGS += -DCONFIG_INTERVAL_TREE_SPAN_ITER
+
+clean:
+ $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/*
diff --git a/tools/testing/rbtree/interval_tree_test.c b/tools/testing/rbtree/interval_tree_test.c
new file mode 100644
index 000000000000..49bc5b534330
--- /dev/null
+++ b/tools/testing/rbtree/interval_tree_test.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * interval_tree.c: Userspace Interval Tree test-suite
+ * Copyright (c) 2025 Wei Yang <richard.weiyang@gmail.com>
+ */
+#include <linux/math64.h>
+#include <linux/kern_levels.h>
+#include "shared.h"
+#include "maple-shared.h"
+
+#include "../../../lib/interval_tree_test.c"
+
+int usage(void)
+{
+ fprintf(stderr, "Userland interval tree test cases\n");
+ fprintf(stderr, " -n: Number of nodes in the interval tree\n");
+ fprintf(stderr, " -p: Number of iterations modifying the tree\n");
+ fprintf(stderr, " -q: Number of searches to the interval tree\n");
+ fprintf(stderr, " -s: Number of iterations searching the tree\n");
+ fprintf(stderr, " -a: Searches will iterate all nodes in the tree\n");
+ fprintf(stderr, " -m: Largest value for the interval's endpoint\n");
+ fprintf(stderr, " -r: Random seed\n");
+ exit(-1);
+}
+
+void interval_tree_tests(void)
+{
+ interval_tree_test_init();
+ interval_tree_test_exit();
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "n:p:q:s:am:r:")) != -1) {
+ if (opt == 'n')
+ nnodes = strtoul(optarg, NULL, 0);
+ else if (opt == 'p')
+ perf_loops = strtoul(optarg, NULL, 0);
+ else if (opt == 'q')
+ nsearches = strtoul(optarg, NULL, 0);
+ else if (opt == 's')
+ search_loops = strtoul(optarg, NULL, 0);
+ else if (opt == 'a')
+ search_all = true;
+ else if (opt == 'm')
+ max_endpoint = strtoul(optarg, NULL, 0);
+ else if (opt == 'r')
+ seed = strtoul(optarg, NULL, 0);
+ else
+ usage();
+ }
+
+ maple_tree_init();
+ interval_tree_tests();
+ return 0;
+}
diff --git a/tools/testing/rbtree/rbtree_test.c b/tools/testing/rbtree/rbtree_test.c
new file mode 100644
index 000000000000..585c970f679e
--- /dev/null
+++ b/tools/testing/rbtree/rbtree_test.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * rbtree_test.c: Userspace Red Black Tree test-suite
+ * Copyright (c) 2025 Wei Yang <richard.weiyang@gmail.com>
+ */
+#include <linux/init.h>
+#include <linux/math64.h>
+#include <linux/kern_levels.h>
+#include "shared.h"
+
+#include "../../../lib/rbtree_test.c"
+
+int usage(void)
+{
+ fprintf(stderr, "Userland rbtree test cases\n");
+ fprintf(stderr, " -n: Number of nodes in the rb-tree\n");
+ fprintf(stderr, " -p: Number of iterations modifying the rb-tree\n");
+ fprintf(stderr, " -c: Number of iterations modifying and verifying the rb-tree\n");
+ fprintf(stderr, " -r: Random seed\n");
+ exit(-1);
+}
+
+void rbtree_tests(void)
+{
+ rbtree_test_init();
+ rbtree_test_exit();
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "n:p:c:r:")) != -1) {
+ if (opt == 'n')
+ nnodes = strtoul(optarg, NULL, 0);
+ else if (opt == 'p')
+ perf_loops = strtoul(optarg, NULL, 0);
+ else if (opt == 'c')
+ check_loops = strtoul(optarg, NULL, 0);
+ else if (opt == 'r')
+ seed = strtoul(optarg, NULL, 0);
+ else
+ usage();
+ }
+
+ rbtree_tests();
+ return 0;
+}
diff --git a/tools/testing/rbtree/test.h b/tools/testing/rbtree/test.h
new file mode 100644
index 000000000000..f1f1b545b55a
--- /dev/null
+++ b/tools/testing/rbtree/test.h
@@ -0,0 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+void rbtree_tests(void);
+void interval_tree_tests(void);
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
index cb24124ac5b9..674aaa02e396 100644
--- a/tools/testing/selftests/.gitignore
+++ b/tools/testing/selftests/.gitignore
@@ -4,7 +4,6 @@ gpiogpio-hammer
gpioinclude/
gpiolsgpio
kselftest_install/
-tpm2/SpaceTest.log
# Python bytecode and cache
__pycache__/
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 2694344274bf..c77c8c8e3d9b 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -62,6 +62,7 @@ TARGETS += mount
TARGETS += mount_setattr
TARGETS += move_mount_set_group
TARGETS += mqueue
+TARGETS += mseal_system_mappings
TARGETS += nci
TARGETS += net
TARGETS += net/af_unix
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 5680befae8c6..5e713ef7caa3 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_STAT=y
CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPUSETS=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_BLAKE2B=y
CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_XXHASH=y
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 09f6487f58b9..5fea3209566e 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -6,6 +6,7 @@
#include "for_each_array_map_elem.skel.h"
#include "for_each_map_elem_write_key.skel.h"
#include "for_each_multi_maps.skel.h"
+#include "for_each_hash_modify.skel.h"
static unsigned int duration;
@@ -203,6 +204,40 @@ out:
for_each_multi_maps__destroy(skel);
}
+static void test_hash_modify(void)
+{
+ struct for_each_hash_modify *skel;
+ int max_entries, i, err;
+ __u64 key, val;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1
+ );
+
+ skel = for_each_hash_modify__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load"))
+ return;
+
+ max_entries = bpf_map__max_entries(skel->maps.hashmap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i;
+ err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_OK(topts.retval, "retval");
+
+out:
+ for_each_hash_modify__destroy(skel);
+}
+
void test_for_each(void)
{
if (test__start_subtest("hash_map"))
@@ -213,4 +248,6 @@ void test_for_each(void)
test_write_map_key();
if (test__start_subtest("multi_maps"))
test_multi_maps();
+ if (test__start_subtest("hash_modify"))
+ test_hash_modify();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
index 115287ba441b..0703e987df89 100644
--- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
@@ -25,8 +25,11 @@ static void *spin_lock_thread(void *arg)
while (!READ_ONCE(skip)) {
err = bpf_prog_test_run_opts(prog_fd, &topts);
- ASSERT_OK(err, "test_run");
- ASSERT_OK(topts.retval, "test_run retval");
+ if (err || topts.retval) {
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
+ break;
+ }
}
pthread_exit(arg);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 2d0796314862..0a99fd404f6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -68,7 +68,6 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
goto close_cli;
err = disconnect(cli);
- ASSERT_OK(err, "disconnect");
close_cli:
close(cli);
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 13a2e22f5465..863df7c0fdd0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -221,7 +221,7 @@
#define CAN_USE_GOTOL
#endif
-#if _clang_major__ >= 18
+#if __clang_major__ >= 18
#define CAN_USE_BPF_ST
#endif
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_modify.c b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c
new file mode 100644
index 000000000000..82307166f789
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Intel Corporation */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 128);
+ __type(key, __u64);
+ __type(value, __u64);
+} hashmap SEC(".maps");
+
+static int cb(struct bpf_map *map, __u64 *key, __u64 *val, void *arg)
+{
+ bpf_map_delete_elem(map, key);
+ bpf_map_update_elem(map, key, val, 0);
+ return 0;
+}
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ (void)skb;
+
+ bpf_for_each_map_elem(&hashmap, cb, NULL, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 8bd1ebd7d6af..813143b4985d 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -223,7 +223,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) {
payload += filepart_length;
}
- cgroup_node = BPF_CORE_READ(cgroup_node, parent);
+ cgroup_node = BPF_CORE_READ(cgroup_node, __parent);
}
return payload;
}
diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock.c b/tools/testing/selftests/bpf/progs/res_spin_lock.c
index b33385dfbd35..22c4fb8b9266 100644
--- a/tools/testing/selftests/bpf/progs/res_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/res_spin_lock.c
@@ -38,13 +38,14 @@ int res_spin_lock_test(struct __sk_buff *ctx)
r = bpf_res_spin_lock(&elem1->lock);
if (r)
return r;
- if (!bpf_res_spin_lock(&elem2->lock)) {
+ r = bpf_res_spin_lock(&elem2->lock);
+ if (!r) {
bpf_res_spin_unlock(&elem2->lock);
bpf_res_spin_unlock(&elem1->lock);
return -1;
}
bpf_res_spin_unlock(&elem1->lock);
- return 0;
+ return r != -EDEADLK;
}
SEC("tc")
@@ -124,12 +125,15 @@ int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx)
/* Trigger AA, after exhausting entries in the held lock table. This
* time, only the timeout can save us, as AA detection won't succeed.
*/
- if (!bpf_res_spin_lock(locks[34])) {
+ ret = bpf_res_spin_lock(locks[34]);
+ if (!ret) {
bpf_res_spin_unlock(locks[34]);
ret = 1;
goto end;
}
+ ret = ret != -ETIMEDOUT ? 2 : 0;
+
end:
for (i = i - 1; i >= 0; i--)
bpf_res_spin_unlock(locks[i]);
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index fb07f5773888..7f3c233943b3 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -117,7 +117,7 @@ int BPF_PROG(handle_fexit_ret, int arg, struct file *ret)
bpf_probe_read_kernel(&buf, 8, ret);
bpf_probe_read_kernel(&buf, 8, (char *)ret + 256);
- *(volatile long long *)ret;
+ *(volatile int *)ret;
*(volatile int *)&ret->f_mode;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
index e2a21fbd4e44..dcac69f5928a 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
@@ -21,7 +21,7 @@ static __u64 test_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
SEC("fexit/bpf_testmod_return_ptr")
int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret)
{
- *(volatile long *)ret;
+ *(volatile int *)ret;
*(volatile int *)&ret->f_mode;
bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
triggered++;
@@ -31,7 +31,7 @@ int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret)
SEC("fexit/bpf_testmod_return_ptr")
int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret)
{
- *(volatile long *)ret;
+ *(volatile int *)ret;
*(volatile int *)&ret->f_mode;
bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
triggered++;
@@ -41,7 +41,7 @@ int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret)
SEC("fexit/bpf_testmod_return_ptr")
int BPF_PROG(handle_fexit_ret_subprogs3, int arg, struct file *ret)
{
- *(volatile long *)ret;
+ *(volatile int *)ret;
*(volatile int *)&ret->f_mode;
bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
triggered++;
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
index a9be6ae49454..c258b0722e04 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
@@ -12,7 +12,7 @@ SEC("raw_tp")
__arch_x86_64
__log_level(4) __msg("stack depth 8")
__xlated("4: r5 = 5")
-__xlated("5: w0 = ")
+__xlated("5: r0 = ")
__xlated("6: r0 = &(void __percpu *)(r0)")
__xlated("7: r0 = *(u32 *)(r0 +0)")
__xlated("8: exit")
@@ -704,7 +704,7 @@ SEC("raw_tp")
__arch_x86_64
__log_level(4) __msg("stack depth 32+0")
__xlated("2: r1 = 1")
-__xlated("3: w0 =")
+__xlated("3: r0 =")
__xlated("4: r0 = &(void __percpu *)(r0)")
__xlated("5: r0 = *(u32 *)(r0 +0)")
/* bpf_loop params setup */
@@ -753,7 +753,7 @@ __arch_x86_64
__log_level(4) __msg("stack depth 40+0")
/* call bpf_get_smp_processor_id */
__xlated("2: r1 = 42")
-__xlated("3: w0 =")
+__xlated("3: r0 =")
__xlated("4: r0 = &(void __percpu *)(r0)")
__xlated("5: r0 = *(u32 *)(r0 +0)")
/* call bpf_get_prandom_u32 */
diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
index b1fbdf119553..fc91b414364e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
@@ -27,7 +27,7 @@ __description("Private stack, single prog")
__success
__arch_x86_64
__jited(" movabsq $0x{{.*}}, %r9")
-__jited(" addq %gs:0x{{.*}}, %r9")
+__jited(" addq %gs:{{.*}}, %r9")
__jited(" movl $0x2a, %edi")
__jited(" movq %rdi, -0x100(%r9)")
__naked void private_stack_single_prog(void)
@@ -74,7 +74,7 @@ __success
__arch_x86_64
/* private stack fp for the main prog */
__jited(" movabsq $0x{{.*}}, %r9")
-__jited(" addq %gs:0x{{.*}}, %r9")
+__jited(" addq %gs:{{.*}}, %r9")
__jited(" movl $0x2a, %edi")
__jited(" movq %rdi, -0x200(%r9)")
__jited(" pushq %r9")
@@ -122,7 +122,7 @@ __jited(" pushq %rbp")
__jited(" movq %rsp, %rbp")
__jited(" endbr64")
__jited(" movabsq $0x{{.*}}, %r9")
-__jited(" addq %gs:0x{{.*}}, %r9")
+__jited(" addq %gs:{{.*}}, %r9")
__jited(" pushq %r9")
__jited(" callq")
__jited(" popq %r9")
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 400a696a0d21..a17256d9f88a 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -88,22 +88,32 @@ echo "" > test/cpuset.cpus
# If isolated CPUs have been reserved at boot time (as shown in
# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8
# that will be used by this script for testing purpose. If not, some of
-# the tests may fail incorrectly. These pre-isolated CPUs should stay in
-# an isolated state throughout the testing process for now.
+# the tests may fail incorrectly. Wait a bit and retry again just in case
+# these isolated CPUs are leftover from previous run and have just been
+# cleaned up earlier in this script.
+#
+# These pre-isolated CPUs should stay in an isolated state throughout the
+# testing process for now.
#
BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
+[[ -n "$BOOT_ISOLCPUS" ]] && {
+ sleep 0.5
+ BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
+}
if [[ -n "$BOOT_ISOLCPUS" ]]
then
[[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] &&
skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested"
echo "Pre-isolated CPUs: $BOOT_ISOLCPUS"
fi
+
cleanup()
{
online_cpus
cd $CGROUP2
- rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
- rmdir test > /dev/null 2>&1
+ rmdir A1/A2/A3 A1/A2 A1 B1 test/A1 test/B1 test > /dev/null 2>&1
+ rmdir rtest/p1/c11 rtest/p1/c12 rtest/p2/c21 \
+ rtest/p2/c22 rtest/p1 rtest/p2 rtest > /dev/null 2>&1
[[ -n "$SCHED_DEBUG" ]] &&
echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose
}
@@ -173,14 +183,22 @@ test_add_proc()
#
# Cgroup test hierarchy
#
-# root -- A1 -- A2 -- A3
-# +- B1
+# root
+# |
+# +------+------+
+# | |
+# A1 B1
+# |
+# A2
+# |
+# A3
#
# P<v> = set cpus.partition (0:member, 1:root, 2:isolated)
# C<l> = add cpu-list to cpuset.cpus
# X<l> = add cpu-list to cpuset.cpus.exclusive
# S<p> = use prefix in subtree_control
# T = put a task into cgroup
+# CX<l> = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive
# O<c>=<v> = Write <v> to CPU online file of <c>
#
# ECPUs - effective CPUs of cpusets
@@ -207,130 +225,129 @@ TEST_MATRIX=(
" C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5"
" C0-1 . . C2-3:P1 . . . C2 0 "
" C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5"
- "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3"
- "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3"
- "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0"
- "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2"
- "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1"
- "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3"
+ "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0"
+ "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2"
+ "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1"
+ "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
# CPU offlining cases:
- " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1,B1:3"
- "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1,A2:3"
- "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1,A2:2-3"
- "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0,A2:2-3"
- "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1,A2:2-3"
- "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
- "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
- "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:,A2:3 A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2,A2: A1:P1,A2:P1"
- "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1"
- "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2,A2:2 A1:P1,A2:P-1"
- "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
- "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1"
- "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1"
- "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
- "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+ " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1|B1:3"
+ "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3"
+ "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3"
+ "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2"
+ "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2"
+ "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1"
+ "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1"
+ "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1"
+ "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1|A2:3|A3:3 A1:P1|A2:P1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1|A2:2|A3:2 A1:P1|A2:P1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
#
# Remote partition and cpuset.cpus.exclusive tests
#
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3|A2:1-3|A3:2-3|B1:2-3 A1:P0|A3:P0|B1:P-2"
" C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
- " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5"
- " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4,A2:1-3,A3:1-3 A2:P2 1-3"
- " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4,A2:4,A3:2-3 A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5"
+ " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3"
+ " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3"
# Nested remote/local partition tests
- " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \
- A1:P0,A2:P1,A3:P2,B1:P1 2-3"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \
- A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3"
- " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1,A2:2-3,A3:2-3,B1:4 \
- A1:P0,A2:P1,A3:P0,B1:P1"
- " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \
- A1:P0,A2:P1,A3:P2,B1:P1 2-4,3"
- " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \
- A1:P0,A2:P2,A3:P1 2-4,2-3"
- " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1,A2:2,A3:3-4 \
- A1:P0,A2:P2,A3:P1 2"
+ " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \
+ A1:P0|A2:P1|A3:P2|B1:P1 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \
+ A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \
+ A1:P0|A2:P1|A3:P0|B1:P1"
+ " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \
+ A1:P0|A2:P1|A3:P2|B1:P1 2-4|3"
+ " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \
+ A1:P0|A2:P2|A3:P1 2-4|2-3"
+ " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \
+ A1:P0|A2:P2|A3:P1 2"
" C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
- . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \
- A1:P0,A2:P-2,A3:P-1"
+ . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \
+ A1:P0|A2:P-2|A3:P-1 ."
" C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
- . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \
- A1:P0,A2:P2,A3:P-1 2-4"
+ . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \
+ A1:P0|A2:P2|A3:P-1 2-4"
# Remote partition offline tests
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3"
- " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3,"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3"
+ " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|"
# An invalidated remote partition cannot self-recover from hotplug
- " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2"
+ " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ."
# cpus.exclusive.effective clearing test
- " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3,A2:1-3,A3:2,XA1:"
+ " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:"
# Invalid to valid remote partition transition test
- " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3,A2:1-3,XA2: A2:P-2"
+ " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ."
" C0-3:S+ C1-3:X3:P2
- . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3"
+ . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3"
# Invalid to valid local partition direct transition tests
- " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:1-3:XA2: A1:P2,A2:P-2 1-3"
- " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3"
- " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0"
- " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3"
- " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3"
+ " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3"
+ " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3"
+ " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0"
+ " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3"
# Local partition invalidation tests
" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
- . . . . . 0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3"
+ . . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3"
" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
- . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+ . . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3"
" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
- . . C4:X . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+ . . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3"
# Local partition CPU change tests
- " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2"
- " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3"
+ " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2"
+ " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3"
# cpus_allowed/exclusive_cpus update tests
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
- . X:C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \
- A1:P0,A3:P-2"
+ . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \
+ A1:P0|A3:P-2 ."
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
- . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \
- A1:P0,A3:P-2"
+ . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \
+ A1:P0|A3:P-2 ."
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
- . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \
- A1:P0,A3:P2 3"
+ . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \
+ A1:P0|A3:P2 3"
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
- . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \
- A1:P0,A3:P-2"
+ . . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \
+ A1:P0|A3:P2 3"
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
- . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \
- A1:P0,A3:P-2"
+ . X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \
+ A1:P0|A3:P-2"
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
@@ -339,68 +356,127 @@ TEST_MATRIX=(
#
# Adding CPUs to partition root that are not in parent's
# cpuset.cpus is allowed, but those extra CPUs are ignored.
- "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1"
# Taking away all CPUs from parent or itself if there are tasks
# will make the partition invalid.
- "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1"
- " C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1"
- "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
- "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1"
+ " C3:P1:S+ C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1"
+ "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1"
+ "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1"
# Changing a partition root to member makes child partitions invalid
- "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1"
- "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1"
+ "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1"
+ "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1"
# cpuset.cpus can contains cpus not in parent's cpuset.cpus as long
# as they overlap.
- "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1"
# Deletion of CPUs distributed to child cgroup is allowed.
- "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5"
+ "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5"
# To become a valid partition root, cpuset.cpus must overlap parent's
# cpuset.cpus.
- " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1"
+ " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1"
# Enabling partition with child cpusets is allowed
- " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1"
+ " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1"
- # A partition root with non-partition root parent is invalid, but it
+ # A partition root with non-partition root parent is invalid| but it
# can be made valid if its parent becomes a partition root too.
- " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2"
- " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2"
+ " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2"
+ " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1"
# A non-exclusive cpuset.cpus change will invalidate partition and its siblings
- " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0"
- " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1"
- " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1"
+ " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0"
+ " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P-1"
+ " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P0|B1:P-1"
# cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it
- " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5"
+ " C0-3 . . C4-5 X5 . . . 0 A1:0-3|B1:4-5"
# Child partition root that try to take all CPUs from parent partition
# with tasks will remain invalid.
- " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1"
- " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1,A2:1-4 A1:P1,A2:P1"
- " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1"
+ " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1"
+ " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1"
+ " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1"
# Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't
# affect cpuset.cpus.exclusive.effective.
- " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4,XA2:3"
+ " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3"
+
+ # cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive
+ # but creating a local partition out of it is not allowed. Similarly and change
+ # in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will
+ # invalidate it.
+ " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \
+ A1:P0|A2:P2:B1:P1 2-4"
+ " CX1-4:S+ CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \
+ A1:P0|A2:P2:B1:P-1 2-4"
+ " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \
+ A1:P0|A2:P2:B1:P-1 2-4"
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
# Failure cases:
# A task cannot be added to a partition with no cpu
- "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1"
# Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected
- " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5"
+ " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5"
# cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive
- " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3,B1:4-5"
+ " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5"
+)
+
+#
+# Cpuset controller remote partition test matrix.
+#
+# Cgroup test hierarchy
+#
+# root
+# |
+# rtest (cpuset.cpus.exclusive=1-7)
+# |
+# +------+------+
+# | |
+# p1 p2
+# +--+--+ +--+--+
+# | | | |
+# c11 c12 c21 c22
+#
+# REMOTE_TEST_MATRIX uses the same notational convention as TEST_MATRIX.
+# Only CPUs 1-7 should be used.
+#
+REMOTE_TEST_MATRIX=(
+ # old-p1 old-p2 old-c11 old-c12 old-c21 old-c22
+ # new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS
+ # ------ ------ ------- ------- ------- ------- ----- ------ --------
+ " X1-3:S+ X4-6:S+ X1-2 X3 X4-5 X6 \
+ . . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \
+ c11:P2|c12:P2|c21:P2|c22:P2 1-6"
+ " CX1-4:S+ . X1-2:P2 C3 . . \
+ . . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \
+ p1:P0|c11:P2|c12:P0 1-2"
+ " CX1-4:S+ . X1-2:P2 . . . \
+ X2-4 . . . . . p1:1,3-4|c11:2 \
+ p1:P0|c11:P2 2"
+ " CX1-5:S+ . X1-2:P2 X3-5:P1 . . \
+ X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \
+ p1:P0|c11:P2|c12:P1 2"
+ " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \
+ . . X2 . . . p1:1|c11:2|c12:3-4 \
+ p1:P0|c11:P2|c12:P1 2"
+ # p1 as member, will get its effective CPUs from its parent rtest
+ " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \
+ . . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \
+ p1:P0|c11:P2|c12:P1 1"
+ " CX1-4:S+ X5-6:P1:S+ . . . . \
+ . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \
+ p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \
+ 1-2,4-6|1-2,5-6"
)
#
@@ -453,25 +529,26 @@ set_ctrl_state()
PFILE=$CGRP/cpuset.cpus.partition
CFILE=$CGRP/cpuset.cpus
XFILE=$CGRP/cpuset.cpus.exclusive
- S=$(expr substr $CMD 1 1)
- if [[ $S = S ]]
- then
- PREFIX=${CMD#?}
+ case $CMD in
+ S*) PREFIX=${CMD#?}
COMM="echo ${PREFIX}${CTRL} > $SFILE"
eval $COMM $REDIRECT
- elif [[ $S = X ]]
- then
+ ;;
+ X*)
CPUS=${CMD#?}
COMM="echo $CPUS > $XFILE"
eval $COMM $REDIRECT
- elif [[ $S = C ]]
- then
- CPUS=${CMD#?}
+ ;;
+ CX*)
+ CPUS=${CMD#??}
+ COMM="echo $CPUS > $CFILE; echo $CPUS > $XFILE"
+ eval $COMM $REDIRECT
+ ;;
+ C*) CPUS=${CMD#?}
COMM="echo $CPUS > $CFILE"
eval $COMM $REDIRECT
- elif [[ $S = P ]]
- then
- VAL=${CMD#?}
+ ;;
+ P*) VAL=${CMD#?}
case $VAL in
0) VAL=member
;;
@@ -486,15 +563,17 @@ set_ctrl_state()
esac
COMM="echo $VAL > $PFILE"
eval $COMM $REDIRECT
- elif [[ $S = O ]]
- then
- VAL=${CMD#?}
+ ;;
+ O*) VAL=${CMD#?}
write_cpu_online $VAL
- elif [[ $S = T ]]
- then
- COMM="echo 0 > $TFILE"
+ ;;
+ T*) COMM="echo 0 > $TFILE"
eval $COMM $REDIRECT
- fi
+ ;;
+ *) echo "Unknown command: $CMD"
+ exit 1
+ ;;
+ esac
RET=$?
[[ $RET -ne 0 ]] && {
[[ -n "$SHOWERR" ]] && {
@@ -532,21 +611,18 @@ online_cpus()
}
#
-# Return 1 if the list of effective cpus isn't the same as the initial list.
+# Remove all the test cgroup directories
#
reset_cgroup_states()
{
echo 0 > $CGROUP2/cgroup.procs
online_cpus
- rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
- pause 0.02
- set_ctrl_state . R-
- pause 0.01
+ rmdir $RESET_LIST > /dev/null 2>&1
}
dump_states()
{
- for DIR in . A1 A1/A2 A1/A2/A3 B1
+ for DIR in $CGROUP_LIST
do
CPUS=$DIR/cpuset.cpus
ECPUS=$DIR/cpuset.cpus.effective
@@ -566,17 +642,33 @@ dump_states()
}
#
+# Set the actual cgroup directory into $CGRP_DIR
+# $1 - cgroup name
+#
+set_cgroup_dir()
+{
+ CGRP_DIR=$1
+ [[ $CGRP_DIR = A2 ]] && CGRP_DIR=A1/A2
+ [[ $CGRP_DIR = A3 ]] && CGRP_DIR=A1/A2/A3
+ [[ $CGRP_DIR = c11 ]] && CGRP_DIR=p1/c11
+ [[ $CGRP_DIR = c12 ]] && CGRP_DIR=p1/c12
+ [[ $CGRP_DIR = c21 ]] && CGRP_DIR=p2/c21
+ [[ $CGRP_DIR = c22 ]] && CGRP_DIR=p2/c22
+}
+
+#
# Check effective cpus
-# $1 - check string, format: <cgroup>:<cpu-list>[,<cgroup>:<cpu-list>]*
+# $1 - check string, format: <cgroup>:<cpu-list>[|<cgroup>:<cpu-list>]*
#
check_effective_cpus()
{
CHK_STR=$1
- for CHK in $(echo $CHK_STR | sed -e "s/,/ /g")
+ for CHK in $(echo $CHK_STR | sed -e "s/|/ /g")
do
set -- $(echo $CHK | sed -e "s/:/ /g")
CGRP=$1
- CPUS=$2
+ EXPECTED_CPUS=$2
+ ACTUAL_CPUS=
if [[ $CGRP = X* ]]
then
CGRP=${CGRP#X}
@@ -584,41 +676,39 @@ check_effective_cpus()
else
FILE=cpuset.cpus.effective
fi
- [[ $CGRP = A2 ]] && CGRP=A1/A2
- [[ $CGRP = A3 ]] && CGRP=A1/A2/A3
- [[ -e $CGRP/$FILE ]] || return 1
- [[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1
+ set_cgroup_dir $CGRP
+ [[ -e $CGRP_DIR/$FILE ]] || return 1
+ ACTUAL_CPUS=$(cat $CGRP_DIR/$FILE)
+ [[ $EXPECTED_CPUS = $ACTUAL_CPUS ]] || return 1
done
}
#
# Check cgroup states
-# $1 - check string, format: <cgroup>:<state>[,<cgroup>:<state>]*
+# $1 - check string, format: <cgroup>:<state>[|<cgroup>:<state>]*
#
check_cgroup_states()
{
CHK_STR=$1
- for CHK in $(echo $CHK_STR | sed -e "s/,/ /g")
+ for CHK in $(echo $CHK_STR | sed -e "s/|/ /g")
do
set -- $(echo $CHK | sed -e "s/:/ /g")
CGRP=$1
- CGRP_DIR=$CGRP
- STATE=$2
+ EXPECTED_STATE=$2
FILE=
- EVAL=$(expr substr $STATE 2 2)
- [[ $CGRP = A2 ]] && CGRP_DIR=A1/A2
- [[ $CGRP = A3 ]] && CGRP_DIR=A1/A2/A3
+ EVAL=$(expr substr $EXPECTED_STATE 2 2)
- case $STATE in
+ set_cgroup_dir $CGRP
+ case $EXPECTED_STATE in
P*) FILE=$CGRP_DIR/cpuset.cpus.partition
;;
- *) echo "Unknown state: $STATE!"
+ *) echo "Unknown state: $EXPECTED_STATE!"
exit 1
;;
esac
- VAL=$(cat $FILE)
+ ACTUAL_STATE=$(cat $FILE)
- case "$VAL" in
+ case "$ACTUAL_STATE" in
member) VAL=0
;;
root) VAL=1
@@ -642,7 +732,7 @@ check_cgroup_states()
[[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && {
DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective)
[[ -n "$DOMS" ]] &&
- echo " [$CGRP] sched-domain: $DOMS" > $CONSOLE
+ echo " [$CGRP_DIR] sched-domain: $DOMS" > $CONSOLE
}
done
return 0
@@ -665,22 +755,22 @@ check_cgroup_states()
#
check_isolcpus()
{
- EXPECT_VAL=$1
- ISOLCPUS=
+ EXPECTED_ISOLCPUS=$1
+ ISCPUS=${CGROUP2}/cpuset.cpus.isolated
+ ISOLCPUS=$(cat $ISCPUS)
LASTISOLCPU=
SCHED_DOMAINS=/sys/kernel/debug/sched/domains
- ISCPUS=${CGROUP2}/cpuset.cpus.isolated
- if [[ $EXPECT_VAL = . ]]
+ if [[ $EXPECTED_ISOLCPUS = . ]]
then
- EXPECT_VAL=
- EXPECT_VAL2=
- elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]]
+ EXPECTED_ISOLCPUS=
+ EXPECTED_SDOMAIN=
+ elif [[ $(expr $EXPECTED_ISOLCPUS : ".*|.*") > 0 ]]
then
- set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g")
- EXPECT_VAL=$1
- EXPECT_VAL2=$2
+ set -- $(echo $EXPECTED_ISOLCPUS | sed -e "s/|/ /g")
+ EXPECTED_ISOLCPUS=$2
+ EXPECTED_SDOMAIN=$1
else
- EXPECT_VAL2=$EXPECT_VAL
+ EXPECTED_SDOMAIN=$EXPECTED_ISOLCPUS
fi
#
@@ -689,20 +779,21 @@ check_isolcpus()
# to make appending those CPUs easier.
#
[[ -n "$BOOT_ISOLCPUS" ]] && {
- EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS}
- EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS}
+ EXPECTED_ISOLCPUS=${EXPECTED_ISOLCPUS:+${EXPECTED_ISOLCPUS},}${BOOT_ISOLCPUS}
+ EXPECTED_SDOMAIN=${EXPECTED_SDOMAIN:+${EXPECTED_SDOMAIN},}${BOOT_ISOLCPUS}
}
#
# Check cpuset.cpus.isolated cpumask
#
- [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && {
+ [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && {
# Take a 50ms pause and try again
pause 0.05
ISOLCPUS=$(cat $ISCPUS)
}
- [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1
+ [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && return 1
ISOLCPUS=
+ EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN
#
# Use the sched domain in debugfs to check isolated CPUs, if available
@@ -736,7 +827,7 @@ check_isolcpus()
done
[[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
- [[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
+ [[ "$EXPECTED_SDOMAIN" = "$ISOLCPUS" ]]
}
test_fail()
@@ -774,6 +865,63 @@ null_isolcpus_check()
}
#
+# Check state transition test result
+# $1 - Test number
+# $2 - Expected effective CPU values
+# $3 - Expected partition states
+# $4 - Expected isolated CPUs
+#
+check_test_results()
+{
+ _NR=$1
+ _ECPUS="$2"
+ _PSTATES="$3"
+ _ISOLCPUS="$4"
+
+ [[ -n "$_ECPUS" && "$_ECPUS" != . ]] && {
+ check_effective_cpus $_ECPUS
+ [[ $? -ne 0 ]] && test_fail $_NR "effective CPU" \
+ "Cgroup $CGRP: expected $EXPECTED_CPUS, got $ACTUAL_CPUS"
+ }
+
+ [[ -n "$_PSTATES" && "$_PSTATES" != . ]] && {
+ check_cgroup_states $_PSTATES
+ [[ $? -ne 0 ]] && test_fail $_NR states \
+ "Cgroup $CGRP: expected $EXPECTED_STATE, got $ACTUAL_STATE"
+ }
+
+ # Compare the expected isolated CPUs with the actual ones,
+ # if available
+ [[ -n "$_ISOLCPUS" ]] && {
+ check_isolcpus $_ISOLCPUS
+ [[ $? -ne 0 ]] && {
+ [[ -n "$BOOT_ISOLCPUS" ]] && _ISOLCPUS=${_ISOLCPUS},${BOOT_ISOLCPUS}
+ test_fail $_NR "isolated CPU" \
+ "Expect $_ISOLCPUS, get $ISOLCPUS instead"
+ }
+ }
+ reset_cgroup_states
+ #
+ # Check to see if effective cpu list changes
+ #
+ _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective)
+ RETRY=0
+ while [[ $_NEWLIST != $CPULIST && $RETRY -lt 8 ]]
+ do
+ # Wait a bit longer & recheck a few times
+ pause 0.02
+ ((RETRY++))
+ _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective)
+ done
+ [[ $_NEWLIST != $CPULIST ]] && {
+ echo "Effective cpus changed to $_NEWLIST after test $_NR!"
+ exit 1
+ }
+ null_isolcpus_check
+ [[ $VERBOSE -gt 0 ]] && echo "Test $I done."
+}
+
+#
# Run cpuset state transition test
# $1 - test matrix name
#
@@ -785,6 +933,8 @@ run_state_test()
{
TEST=$1
CONTROLLER=cpuset
+ CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1"
+ RESET_LIST="A1/A2/A3 A1/A2 A1 B1"
I=0
eval CNT="\${#$TEST[@]}"
@@ -812,10 +962,11 @@ run_state_test()
STATES=${11}
ICPUS=${12}
- set_ctrl_state_noerr B1 $OLD_B1
set_ctrl_state_noerr A1 $OLD_A1
set_ctrl_state_noerr A1/A2 $OLD_A2
set_ctrl_state_noerr A1/A2/A3 $OLD_A3
+ set_ctrl_state_noerr B1 $OLD_B1
+
RETVAL=0
set_ctrl_state A1 $NEW_A1; ((RETVAL += $?))
set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?))
@@ -824,47 +975,79 @@ run_state_test()
[[ $RETVAL -ne $RESULT ]] && test_fail $I result
- [[ -n "$ECPUS" && "$ECPUS" != . ]] && {
- check_effective_cpus $ECPUS
- [[ $? -ne 0 ]] && test_fail $I "effective CPU"
- }
+ check_test_results $I "$ECPUS" "$STATES" "$ICPUS"
+ ((I++))
+ done
+ echo "All $I tests of $TEST PASSED."
+}
- [[ -n "$STATES" && "$STATES" != . ]] && {
- check_cgroup_states $STATES
- [[ $? -ne 0 ]] && test_fail $I states
- }
+#
+# Run cpuset remote partition state transition test
+# $1 - test matrix name
+#
+run_remote_state_test()
+{
+ TEST=$1
+ CONTROLLER=cpuset
+ [[ -d rtest ]] || mkdir rtest
+ cd rtest
+ echo +cpuset > cgroup.subtree_control
+ echo "1-7" > cpuset.cpus
+ echo "1-7" > cpuset.cpus.exclusive
+ CGROUP_LIST=".. . p1 p2 p1/c11 p1/c12 p2/c21 p2/c22"
+ RESET_LIST="p1/c11 p1/c12 p2/c21 p2/c22 p1 p2"
+ I=0
+ eval CNT="\${#$TEST[@]}"
- # Compare the expected isolated CPUs with the actual ones,
- # if available
- [[ -n "$ICPUS" ]] && {
- check_isolcpus $ICPUS
- [[ $? -ne 0 ]] && {
- [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS}
- test_fail $I "isolated CPU" \
- "Expect $ICPUS, get $ISOLCPUS instead"
- }
- }
- reset_cgroup_states
- #
- # Check to see if effective cpu list changes
- #
- NEWLIST=$(cat cpuset.cpus.effective)
- RETRY=0
- while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]]
- do
- # Wait a bit longer & recheck a few times
- pause 0.02
- ((RETRY++))
- NEWLIST=$(cat cpuset.cpus.effective)
- done
- [[ $NEWLIST != $CPULIST ]] && {
- echo "Effective cpus changed to $NEWLIST after test $I!"
- exit 1
+ reset_cgroup_states
+ console_msg "Running remote partition state transition test ..."
+
+ while [[ $I -lt $CNT ]]
+ do
+ echo "Running test $I ..." > $CONSOLE
+ [[ $VERBOSE -gt 1 ]] && {
+ echo ""
+ eval echo \${$TEST[$I]}
}
- null_isolcpus_check
- [[ $VERBOSE -gt 0 ]] && echo "Test $I done."
+ eval set -- "\${$TEST[$I]}"
+ OLD_p1=$1
+ OLD_p2=$2
+ OLD_c11=$3
+ OLD_c12=$4
+ OLD_c21=$5
+ OLD_c22=$6
+ NEW_p1=$7
+ NEW_p2=$8
+ NEW_c11=$9
+ NEW_c12=${10}
+ NEW_c21=${11}
+ NEW_c22=${12}
+ ECPUS=${13}
+ STATES=${14}
+ ICPUS=${15}
+
+ set_ctrl_state_noerr p1 $OLD_p1
+ set_ctrl_state_noerr p2 $OLD_p2
+ set_ctrl_state_noerr p1/c11 $OLD_c11
+ set_ctrl_state_noerr p1/c12 $OLD_c12
+ set_ctrl_state_noerr p2/c21 $OLD_c21
+ set_ctrl_state_noerr p2/c22 $OLD_c22
+
+ RETVAL=0
+ set_ctrl_state p1 $NEW_p1 ; ((RETVAL += $?))
+ set_ctrl_state p2 $NEW_p2 ; ((RETVAL += $?))
+ set_ctrl_state p1/c11 $NEW_c11; ((RETVAL += $?))
+ set_ctrl_state p1/c12 $NEW_c12; ((RETVAL += $?))
+ set_ctrl_state p2/c21 $NEW_c21; ((RETVAL += $?))
+ set_ctrl_state p2/c22 $NEW_c22; ((RETVAL += $?))
+
+ [[ $RETVAL -ne 0 ]] && test_fail $I result
+
+ check_test_results $I "$ECPUS" "$STATES" "$ICPUS"
((I++))
done
+ cd ..
+ rmdir rtest
echo "All $I tests of $TEST PASSED."
}
@@ -932,6 +1115,7 @@ test_isolated()
echo $$ > $CGROUP2/cgroup.procs
[[ -d A1 ]] && rmdir A1
null_isolcpus_check
+ pause 0.05
}
#
@@ -997,10 +1181,13 @@ test_inotify()
else
echo "Inotify test PASSED"
fi
+ echo member > cpuset.cpus.partition
+ echo "" > cpuset.cpus
}
trap cleanup 0 2 3 6
run_state_test TEST_MATRIX
+run_remote_state_test REMOTE_TEST_MATRIX
test_isolated
test_inotify
echo "All tests PASSED."
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index 3d2663fe50ba..eeca8005723f 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -16,7 +16,7 @@
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
#ifndef __NR_clone3
-#define __NR_clone3 -1
+#define __NR_clone3 435
#endif
struct __clone_args {
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py
index 7cc74faed743..7c90a040ce45 100755
--- a/tools/testing/selftests/drivers/net/hds.py
+++ b/tools/testing/selftests/drivers/net/hds.py
@@ -6,7 +6,7 @@ import os
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx
from lib.py import CmdExitFailure, EthtoolFamily, NlError
from lib.py import NetDrvEnv
-from lib.py import defer, ethtool, ip
+from lib.py import defer, ethtool, ip, random
def _get_hds_mode(cfg, netnl) -> str:
@@ -20,7 +20,7 @@ def _get_hds_mode(cfg, netnl) -> str:
def _xdp_onoff(cfg):
- prog = cfg.rpath("../../net/lib/xdp_dummy.bpf.o")
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
ip("link set dev %s xdp obj %s sec xdp" %
(cfg.ifname, prog))
ip("link set dev %s xdp off" % cfg.ifname)
@@ -109,6 +109,36 @@ def set_hds_thresh_zero(cfg, netnl) -> None:
ksft_eq(0, rings['hds-thresh'])
+def set_hds_thresh_random(cfg, netnl) -> None:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+ if 'hds-thresh-max' not in rings:
+ raise KsftSkipEx('hds-thresh-max not defined by device')
+
+ if rings['hds-thresh-max'] < 2:
+ raise KsftSkipEx('hds-thresh-max is too small')
+ elif rings['hds-thresh-max'] == 2:
+ hds_thresh = 1
+ else:
+ while True:
+ hds_thresh = random.randint(1, rings['hds-thresh-max'] - 1)
+ if hds_thresh != rings['hds-thresh']:
+ break
+
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_thresh})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("hds-thresh-set not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ ksft_eq(hds_thresh, rings['hds-thresh'])
+
def set_hds_thresh_max(cfg, netnl) -> None:
try:
rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
@@ -243,6 +273,7 @@ def main() -> None:
get_hds_thresh,
set_hds_disable,
set_hds_enable,
+ set_hds_thresh_random,
set_hds_thresh_zero,
set_hds_thresh_max,
set_hds_thresh_gt,
diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py
index 701aca1361e0..cd23af875317 100755
--- a/tools/testing/selftests/drivers/net/hw/csum.py
+++ b/tools/testing/selftests/drivers/net/hw/csum.py
@@ -88,7 +88,7 @@ def main() -> None:
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
check_nic_features(cfg)
- cfg.bin_local = cfg.rpath("../../../net/lib/csum")
+ cfg.bin_local = cfg.net_lib_dir / "csum"
cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
cases = []
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index d301d9b356f7..6a0378e06cab 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -27,7 +27,7 @@ def _set_flow_rule(cfg, chan):
def test_zcrx(cfg) -> None:
- cfg.require_v6()
+ cfg.require_ipver('6')
combined_chans = _get_combined_channels(cfg)
if combined_chans < 2:
@@ -35,12 +35,13 @@ def test_zcrx(cfg) -> None:
rx_ring = _get_rx_ring_entries(cfg)
try:
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 12840"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 12840"
with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
wait_port_listen(9999, proto="tcp", host=cfg.remote)
cmd(tx_cmd)
@@ -48,10 +49,11 @@ def test_zcrx(cfg) -> None:
ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
def test_zcrx_oneshot(cfg) -> None:
- cfg.require_v6()
+ cfg.require_ipver('6')
combined_chans = _get_combined_channels(cfg)
if combined_chans < 2:
@@ -59,12 +61,13 @@ def test_zcrx_oneshot(cfg) -> None:
rx_ring = _get_rx_ring_entries(cfg)
try:
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1} -o 4"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_v6} -p 9999 -l 4096 -z 16384"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 4096 -z 16384"
with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
wait_port_listen(9999, proto="tcp", host=cfg.remote)
cmd(tx_cmd)
@@ -72,6 +75,7 @@ def test_zcrx_oneshot(cfg) -> None:
ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+ ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
def main() -> None:
diff --git a/tools/testing/selftests/drivers/net/hw/irq.py b/tools/testing/selftests/drivers/net/hw/irq.py
index 42ab98370245..0699d6a8b4e2 100755
--- a/tools/testing/selftests/drivers/net/hw/irq.py
+++ b/tools/testing/selftests/drivers/net/hw/irq.py
@@ -69,7 +69,7 @@ def check_reconfig_queues(cfg) -> None:
def check_reconfig_xdp(cfg) -> None:
def reconfig(cfg) -> None:
ip(f"link set dev %s xdp obj %s sec xdp" %
- (cfg.ifname, cfg.rpath("xdp_dummy.bpf.o")))
+ (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
ip(f"link set dev %s xdp off" % cfg.ifname)
_check_reconfig(cfg, reconfig)
diff --git a/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c b/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c
deleted file mode 100644
index d988b2e0cee8..000000000000
--- a/tools/testing/selftests/drivers/net/hw/xdp_dummy.bpf.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#define KBUILD_MODNAME "xdp_dummy"
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-SEC("xdp")
-int xdp_dummy_prog(struct xdp_md *ctx)
-{
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index fd4d674e6c72..ad5ff645183a 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -13,22 +13,17 @@ from .remote import Remote
class NetDrvEnvBase:
"""
Base class for a NIC / host envirnoments
+
+ Attributes:
+ test_dir: Path to the source directory of the test
+ net_lib_dir: Path to the net/lib directory
"""
def __init__(self, src_path):
- self.src_path = src_path
- self.env = self._load_env_file()
-
- def rpath(self, path):
- """
- Get an absolute path to a file based on a path relative to the directory
- containing the test which constructed env.
+ self.src_path = Path(src_path)
+ self.test_dir = self.src_path.parent.resolve()
+ self.net_lib_dir = (Path(__file__).parent / "../../../../net/lib").resolve()
- For example, if the test.py is in the same directory as
- a binary (built from helper.c), the test can use env.rpath("helper")
- to get the absolute path to the binary
- """
- src_dir = Path(self.src_path).parent.resolve()
- return (src_dir / path).as_posix()
+ self.env = self._load_env_file()
def _load_env_file(self):
env = os.environ.copy()
diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
index bed748dde4b0..8972f42dfe03 100755
--- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
@@ -266,18 +266,14 @@ run_test()
"${base_time}" \
"${CYCLE_TIME_NS}" \
"${SHIFT_TIME_NS}" \
+ "${GATE_DURATION_NS}" \
"${NUM_PKTS}" \
"${STREAM_VID}" \
"${STREAM_PRIO}" \
"" \
"${isochron_dat}"
- # Count all received packets by looking at the non-zero RX timestamps
- received=$(isochron report \
- --input-file "${isochron_dat}" \
- --printf-format "%u\n" --printf-args "R" | \
- grep -w -v '0' | wc -l)
-
+ received=$(isochron_report_num_received "${isochron_dat}")
if [ "${received}" = "${expected}" ]; then
RET=0
else
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index 93120e86e102..af8df2313a3b 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -9,11 +9,11 @@ from lib.py import EthtoolFamily, NetDrvEpEnv
from lib.py import bkg, cmd, wait_port_listen, rand_port
from lib.py import defer, ethtool, ip
-remote_ifname=""
no_sleep=False
def _test_v4(cfg) -> None:
- cfg.require_ipver("4")
+ if not cfg.addr_v["4"]:
+ return
cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"])
cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
@@ -21,7 +21,8 @@ def _test_v4(cfg) -> None:
cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote)
def _test_v6(cfg) -> None:
- cfg.require_ipver("6")
+ if not cfg.addr_v["6"]:
+ return
cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"])
cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote)
@@ -56,9 +57,8 @@ def _set_offload_checksum(cfg, netnl, on) -> None:
return
def _set_xdp_generic_sb_on(cfg) -> None:
- test_dir = os.path.dirname(os.path.realpath(__file__))
- prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
- cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True)
defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off")
@@ -66,10 +66,9 @@ def _set_xdp_generic_sb_on(cfg) -> None:
time.sleep(10)
def _set_xdp_generic_mb_on(cfg) -> None:
- test_dir = os.path.dirname(os.path.realpath(__file__))
- prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
- cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote)
- defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote)
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog))
defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off")
@@ -77,9 +76,8 @@ def _set_xdp_generic_mb_on(cfg) -> None:
time.sleep(10)
def _set_xdp_native_sb_on(cfg) -> None:
- test_dir = os.path.dirname(os.path.realpath(__file__))
- prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
- cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True)
defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
@@ -95,10 +93,9 @@ def _set_xdp_native_sb_on(cfg) -> None:
time.sleep(10)
def _set_xdp_native_mb_on(cfg) -> None:
- test_dir = os.path.dirname(os.path.realpath(__file__))
- prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
- cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote)
- defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote)
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote)
try:
cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True)
defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
@@ -109,26 +106,22 @@ def _set_xdp_native_mb_on(cfg) -> None:
time.sleep(10)
def _set_xdp_offload_on(cfg) -> None:
- test_dir = os.path.dirname(os.path.realpath(__file__))
- prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ prog = cfg.net_lib_dir / "xdp_dummy.bpf.o"
cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
try:
cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True)
except Exception as e:
raise KsftSkipEx('device does not support offloaded XDP')
defer(ip, f"link set dev {cfg.ifname} xdpoffload off")
- cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
if no_sleep != True:
time.sleep(10)
def get_interface_info(cfg) -> None:
- global remote_ifname
global no_sleep
- remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_addr_v['4']} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout
- remote_ifname = remote_info.rstrip('\n')
- if remote_ifname == "":
+ if cfg.remote_ifname == "":
raise KsftFailEx('Can not get remote interface')
local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim":
@@ -141,15 +134,25 @@ def set_interface_init(cfg) -> None:
cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True)
cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True)
cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True)
- cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+def test_default_v4(cfg, netnl) -> None:
+ cfg.require_ipver("4")
-def test_default(cfg, netnl) -> None:
_set_offload_checksum(cfg, netnl, "off")
_test_v4(cfg)
- _test_v6(cfg)
_test_tcp(cfg)
_set_offload_checksum(cfg, netnl, "on")
_test_v4(cfg)
+ _test_tcp(cfg)
+
+def test_default_v6(cfg, netnl) -> None:
+ cfg.require_ipver("6")
+
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
_test_v6(cfg)
_test_tcp(cfg)
@@ -207,7 +210,8 @@ def main() -> None:
with NetDrvEpEnv(__file__) as cfg:
get_interface_info(cfg)
set_interface_init(cfg)
- ksft_run([test_default,
+ ksft_run([test_default_v4,
+ test_default_v6,
test_xdp_generic_sb,
test_xdp_generic_mb,
test_xdp_native_sb,
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index cae923f84f69..06abd3f233e1 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -26,13 +26,13 @@ def nl_get_queues(cfg, nl, qtype='rx'):
def check_xsk(cfg, nl, xdp_queue_id=0) -> None:
# Probe for support
- xdp = cmd(cfg.rpath("xdp_helper") + ' - -', fail=False)
+ xdp = cmd(f'{cfg.test_dir / "xdp_helper"} - -', fail=False)
if xdp.ret == 255:
raise KsftSkipEx('AF_XDP unsupported')
elif xdp.ret > 0:
raise KsftFailEx('unable to create AF_XDP socket')
- with bkg(f'{cfg.rpath("xdp_helper")} {cfg.ifindex} {xdp_queue_id}',
+ with bkg(f'{cfg.test_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}',
ksft_wait=3):
rx = tx = False
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
index 4a2d5c454fd1..59a71f22fb11 100644
--- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -48,8 +48,16 @@ static uint64_t get_mnt_id(struct __test_metadata *const _metadata,
static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
+static const int mark_cmds[] = {
+ FAN_MARK_ADD,
+ FAN_MARK_REMOVE,
+ FAN_MARK_FLUSH
+};
+
+#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
+
FIXTURE(fanotify) {
- int fan_fd;
+ int fan_fd[NUM_FAN_FDS];
char buf[256];
unsigned int rem;
void *next;
@@ -61,7 +69,7 @@ FIXTURE(fanotify) {
FIXTURE_SETUP(fanotify)
{
- int ret;
+ int i, ret;
ASSERT_EQ(unshare(CLONE_NEWNS), 0);
@@ -89,20 +97,34 @@ FIXTURE_SETUP(fanotify)
self->root_id = get_mnt_id(_metadata, "/");
ASSERT_NE(self->root_id, 0);
- self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0);
- ASSERT_GE(self->fan_fd, 0);
-
- ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS,
- FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL);
- ASSERT_EQ(ret, 0);
+ for (i = 0; i < NUM_FAN_FDS; i++) {
+ self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
+ 0);
+ ASSERT_GE(self->fan_fd[i], 0);
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ // On fd[0] we do an extra ADD that changes nothing.
+ // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
+ ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ }
self->rem = 0;
}
FIXTURE_TEARDOWN(fanotify)
{
+ int i;
+
ASSERT_EQ(self->rem, 0);
- close(self->fan_fd);
+ for (i = 0; i < NUM_FAN_FDS; i++)
+ close(self->fan_fd[i]);
ASSERT_EQ(fchdir(self->orig_root), 0);
@@ -123,8 +145,21 @@ static uint64_t expect_notify(struct __test_metadata *const _metadata,
unsigned int thislen;
if (!self->rem) {
- ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf));
- ASSERT_GT(len, 0);
+ ssize_t len;
+ int i;
+
+ for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
+ len = read(self->fan_fd[i], self->buf,
+ sizeof(self->buf));
+ if (i > 0) {
+ // Groups 1,2 should get EAGAIN
+ ASSERT_EQ(len, -1);
+ ASSERT_EQ(errno, EAGAIN);
+ } else {
+ // Group 0 should get events
+ ASSERT_GT(len, 0);
+ }
+ }
self->rem = len;
self->next = (void *) self->buf;
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
index 6b94b678741a..f656bccb1a14 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
@@ -7,11 +7,32 @@
MAX_ARGS=128
EXCEED_ARGS=$((MAX_ARGS + 1))
+# bash and dash evaluate variables differently.
+# dash will evaluate '\\' every time it is read whereas bash does not.
+#
+# TEST_STRING="$TEST_STRING \\$i"
+# echo $TEST_STRING
+#
+# With i=123
+# On bash, that will print "\123"
+# but on dash, that will print the escape sequence of \123 as the \ will
+# be interpreted again in the echo.
+#
+# Set a variable "bs" to save a double backslash, then echo that
+# to "ts" to see if $ts changed or not. If it changed, it's dash,
+# if not, it's bash, and then bs can equal a single backslash.
+bs='\\'
+ts=`echo $bs`
+if [ "$ts" = '\\' ]; then
+ # this is bash
+ bs='\'
+fi
+
check_max_args() { # event_header
TEST_STRING=$1
# Acceptable
for i in `seq 1 $MAX_ARGS`; do
- TEST_STRING="$TEST_STRING \\$i"
+ TEST_STRING="$TEST_STRING $bs$i"
done
echo "$TEST_STRING" >> dynamic_events
echo > dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
index 118247b8dd84..c62165fabd0c 100644
--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -80,6 +80,26 @@ if [ $misscnt -gt 0 ]; then
exit_fail
fi
+# Check strings too
+if [ -f events/syscalls/sys_enter_openat/filter ]; then
+ DIRNAME=`basename $TMPDIR`
+ echo "filename.ustring ~ \"*$DIRNAME*\"" > events/syscalls/sys_enter_openat/filter
+ echo 1 > events/syscalls/sys_enter_openat/enable
+ echo 1 > tracing_on
+ ls /bin/sh
+ nocnt=`grep openat trace | wc -l`
+ ls $TMPDIR
+ echo 0 > tracing_on
+ hitcnt=`grep openat trace | wc -l`;
+ echo 0 > events/syscalls/sys_enter_openat/enable
+ if [ $nocnt -gt 0 ]; then
+ exit_fail
+ fi
+ if [ $hitcnt -eq 0 ]; then
+ exit_fail
+ fi
+fi
+
reset_events_filter
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc
new file mode 100644
index 000000000000..b6d6a312ead5
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc
@@ -0,0 +1,177 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph filters
+# requires: set_ftrace_filter function_graph:tracer
+
+# Make sure that function graph filtering works
+
+INSTANCE1="instances/test1_$$"
+INSTANCE2="instances/test2_$$"
+
+WD=`pwd`
+
+do_reset() {
+ cd $WD
+ if [ -d $INSTANCE1 ]; then
+ echo nop > $INSTANCE1/current_tracer
+ rmdir $INSTANCE1
+ fi
+ if [ -d $INSTANCE2 ]; then
+ echo nop > $INSTANCE2/current_tracer
+ rmdir $INSTANCE2
+ fi
+}
+
+mkdir $INSTANCE1
+if ! grep -q function_graph $INSTANCE1/available_tracers; then
+ echo "function_graph not allowed with instances"
+ rmdir $INSTANCE1
+ exit_unsupported
+fi
+
+mkdir $INSTANCE2
+
+fail() { # msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+disable_tracing
+clear_trace
+
+function_count() {
+ search=$1
+ vsearch=$2
+
+ if [ -z "$search" ]; then
+ cat enabled_functions | wc -l
+ elif [ -z "$vsearch" ]; then
+ grep $search enabled_functions | wc -l
+ else
+ grep $search enabled_functions | grep $vsearch| wc -l
+ fi
+}
+
+set_fgraph() {
+ instance=$1
+ filter="$2"
+ notrace="$3"
+
+ echo "$filter" > $instance/set_ftrace_filter
+ echo "$notrace" > $instance/set_ftrace_notrace
+ echo function_graph > $instance/current_tracer
+}
+
+check_functions() {
+ orig_cnt=$1
+ test=$2
+
+ cnt=`function_count $test`
+ if [ $cnt -gt $orig_cnt ]; then
+ fail
+ fi
+}
+
+check_cnt() {
+ orig_cnt=$1
+ search=$2
+ vsearch=$3
+
+ cnt=`function_count $search $vsearch`
+ if [ $cnt -gt $orig_cnt ]; then
+ fail
+ fi
+}
+
+reset_graph() {
+ instance=$1
+ echo nop > $instance/current_tracer
+}
+
+# get any functions that were enabled before the test
+total_cnt=`function_count`
+sched_cnt=`function_count sched`
+lock_cnt=`function_count lock`
+time_cnt=`function_count time`
+clock_cnt=`function_count clock`
+locks_clock_cnt=`function_count locks clock`
+clock_locks_cnt=`function_count clock locks`
+
+# Trace functions with "sched" but not "time"
+set_fgraph $INSTANCE1 '*sched*' '*time*'
+
+# Make sure "time" isn't listed
+check_functions $time_cnt 'time'
+instance1_cnt=`function_count`
+
+# Trace functions with "lock" but not "clock"
+set_fgraph $INSTANCE2 '*lock*' '*clock*'
+instance1_2_cnt=`function_count`
+
+# Turn off the first instance
+reset_graph $INSTANCE1
+
+# The second instance doesn't trace "clock" functions
+check_functions $clock_cnt 'clock'
+instance2_cnt=`function_count`
+
+# Start from a clean slate
+reset_graph $INSTANCE2
+check_functions $total_cnt
+
+# Trace functions with "lock" but not "clock"
+set_fgraph $INSTANCE2 '*lock*' '*clock*'
+
+# This should match the last time instance 2 was by itself
+cnt=`function_count`
+if [ $instance2_cnt -ne $cnt ]; then
+ fail
+fi
+
+# And it should not be tracing "clock" functions
+check_functions $clock_cnt 'clock'
+
+# Trace functions with "sched" but not "time"
+set_fgraph $INSTANCE1 '*sched*' '*time*'
+
+# This should match the last time both instances were enabled
+cnt=`function_count`
+if [ $instance1_2_cnt -ne $cnt ]; then
+ fail
+fi
+
+# Turn off the second instance
+reset_graph $INSTANCE2
+
+# This should match the last time instance 1 was by itself
+cnt=`function_count`
+if [ $instance1_cnt -ne $cnt ]; then
+ fail
+fi
+
+# And it should not be tracing "time" functions
+check_functions $time_cnt 'time'
+
+# Start from a clean slate
+reset_graph $INSTANCE1
+check_functions $total_cnt
+
+# Enable all functions but those that have "locks"
+set_fgraph $INSTANCE1 '' '*locks*'
+
+# Enable all functions but those that have "clock"
+set_fgraph $INSTANCE2 '' '*clock*'
+
+# If a function has "locks" it should not have "clock"
+check_cnt $locks_clock_cnt locks clock
+
+# If a function has "clock" it should not have "locks"
+check_cnt $clock_locks_cnt clock locks
+
+reset_graph $INSTANCE1
+reset_graph $INSTANCE2
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
index 7d7a6a06cdb7..2d8230da9064 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -98,7 +98,7 @@ int main(int argc, char *argv[])
info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
if (!res || errno != EWOULDBLOCK) {
- ksft_test_result_pass("futex_waitv returned: %d %s\n",
+ ksft_test_result_fail("futex_waitv returned: %d %s\n",
res ? errno : res,
res ? strerror(errno) : "");
ret = RET_FAIL;
diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common
index 45b5570441ce..b1f40857307d 100644
--- a/tools/testing/selftests/hid/config.common
+++ b/tools/testing/selftests/hid/config.common
@@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_STAT=y
CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPUSETS=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_BLAKE2B=y
CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_CRYPTO_SEQIV=y
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index a1b2b657999d..1a8e85afe9aa 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -342,12 +342,14 @@ FIXTURE(iommufd_ioas)
uint32_t hwpt_id;
uint32_t device_id;
uint64_t base_iova;
+ uint32_t device_pasid_id;
};
FIXTURE_VARIANT(iommufd_ioas)
{
unsigned int mock_domains;
unsigned int memory_limit;
+ bool pasid_capable;
};
FIXTURE_SETUP(iommufd_ioas)
@@ -372,6 +374,12 @@ FIXTURE_SETUP(iommufd_ioas)
IOMMU_TEST_DEV_CACHE_DEFAULT);
self->base_iova = MOCK_APERTURE_START;
}
+
+ if (variant->pasid_capable)
+ test_cmd_mock_domain_flags(self->ioas_id,
+ MOCK_FLAGS_DEVICE_PASID,
+ NULL, NULL,
+ &self->device_pasid_id);
}
FIXTURE_TEARDOWN(iommufd_ioas)
@@ -387,6 +395,7 @@ FIXTURE_VARIANT_ADD(iommufd_ioas, no_domain)
FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain)
{
.mock_domains = 1,
+ .pasid_capable = true,
};
FIXTURE_VARIANT_ADD(iommufd_ioas, two_mock_domain)
@@ -439,6 +448,10 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
&test_hwpt_id);
test_err_hwpt_alloc(EINVAL, self->device_id, self->device_id, 0,
&test_hwpt_id);
+ test_err_hwpt_alloc(EOPNOTSUPP, self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT |
+ IOMMU_HWPT_FAULT_ID_VALID,
+ &test_hwpt_id);
test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
IOMMU_HWPT_ALLOC_NEST_PARENT,
@@ -748,6 +761,8 @@ TEST_F(iommufd_ioas, get_hw_info)
} buffer_smaller;
if (self->device_id) {
+ uint8_t max_pasid = 0;
+
/* Provide a zero-size user_buffer */
test_cmd_get_hw_info(self->device_id, NULL, 0);
/* Provide a user_buffer with exact size */
@@ -762,6 +777,13 @@ TEST_F(iommufd_ioas, get_hw_info)
* the fields within the size range still gets updated.
*/
test_cmd_get_hw_info(self->device_id, &buffer_smaller, sizeof(buffer_smaller));
+ test_cmd_get_hw_info_pasid(self->device_id, &max_pasid);
+ ASSERT_EQ(0, max_pasid);
+ if (variant->pasid_capable) {
+ test_cmd_get_hw_info_pasid(self->device_pasid_id,
+ &max_pasid);
+ ASSERT_EQ(MOCK_PASID_WIDTH, max_pasid);
+ }
} else {
test_err_get_hw_info(ENOENT, self->device_id,
&buffer_exact, sizeof(buffer_exact));
@@ -2736,6 +2758,7 @@ TEST_F(iommufd_viommu, viommu_alloc_nested_iopf)
uint32_t iopf_hwpt_id;
uint32_t fault_id;
uint32_t fault_fd;
+ uint32_t vdev_id;
if (self->device_id) {
test_ioctl_fault_alloc(&fault_id, &fault_fd);
@@ -2752,6 +2775,10 @@ TEST_F(iommufd_viommu, viommu_alloc_nested_iopf)
&iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data,
sizeof(data));
+ /* Must allocate vdevice before attaching to a nested hwpt */
+ test_err_mock_domain_replace(ENOENT, self->stdev_id,
+ iopf_hwpt_id);
+ test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id);
EXPECT_ERRNO(EBUSY,
_test_ioctl_destroy(self->fd, iopf_hwpt_id));
@@ -2769,15 +2796,46 @@ TEST_F(iommufd_viommu, vdevice_alloc)
uint32_t viommu_id = self->viommu_id;
uint32_t dev_id = self->device_id;
uint32_t vdev_id = 0;
+ uint32_t veventq_id;
+ uint32_t veventq_fd;
+ int prev_seq = -1;
if (dev_id) {
+ /* Must allocate vdevice before attaching to a nested hwpt */
+ test_err_mock_domain_replace(ENOENT, self->stdev_id,
+ self->nested_hwpt_id);
+
+ /* Allocate a vEVENTQ with veventq_depth=2 */
+ test_cmd_veventq_alloc(viommu_id, IOMMU_VEVENTQ_TYPE_SELFTEST,
+ &veventq_id, &veventq_fd);
+ test_err_veventq_alloc(EEXIST, viommu_id,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, NULL, NULL);
/* Set vdev_id to 0x99, unset it, and set to 0x88 */
test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id);
+ test_cmd_mock_domain_replace(self->stdev_id,
+ self->nested_hwpt_id);
+ test_cmd_trigger_vevents(dev_id, 1);
+ test_cmd_read_vevents(veventq_fd, 1, 0x99, &prev_seq);
test_err_vdevice_alloc(EEXIST, viommu_id, dev_id, 0x99,
&vdev_id);
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
test_ioctl_destroy(vdev_id);
+
+ /* Try again with 0x88 */
test_cmd_vdevice_alloc(viommu_id, dev_id, 0x88, &vdev_id);
+ test_cmd_mock_domain_replace(self->stdev_id,
+ self->nested_hwpt_id);
+ /* Trigger an overflow with three events */
+ test_cmd_trigger_vevents(dev_id, 3);
+ test_err_read_vevents(EOVERFLOW, veventq_fd, 3, 0x88,
+ &prev_seq);
+ /* Overflow must be gone after the previous reads */
+ test_cmd_trigger_vevents(dev_id, 1);
+ test_cmd_read_vevents(veventq_fd, 1, 0x88, &prev_seq);
+ close(veventq_fd);
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
test_ioctl_destroy(vdev_id);
+ test_ioctl_destroy(veventq_id);
} else {
test_err_vdevice_alloc(ENOENT, viommu_id, dev_id, 0x99, NULL);
}
@@ -2956,4 +3014,311 @@ TEST_F(iommufd_viommu, vdevice_cache)
}
}
+FIXTURE(iommufd_device_pasid)
+{
+ int fd;
+ uint32_t ioas_id;
+ uint32_t hwpt_id;
+ uint32_t stdev_id;
+ uint32_t device_id;
+ uint32_t no_pasid_stdev_id;
+ uint32_t no_pasid_device_id;
+};
+
+FIXTURE_VARIANT(iommufd_device_pasid)
+{
+ bool pasid_capable;
+};
+
+FIXTURE_SETUP(iommufd_device_pasid)
+{
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+ test_ioctl_ioas_alloc(&self->ioas_id);
+
+ test_cmd_mock_domain_flags(self->ioas_id,
+ MOCK_FLAGS_DEVICE_PASID,
+ &self->stdev_id, &self->hwpt_id,
+ &self->device_id);
+ if (!variant->pasid_capable)
+ test_cmd_mock_domain_flags(self->ioas_id, 0,
+ &self->no_pasid_stdev_id, NULL,
+ &self->no_pasid_device_id);
+}
+
+FIXTURE_TEARDOWN(iommufd_device_pasid)
+{
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_device_pasid, no_pasid)
+{
+ .pasid_capable = false,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_device_pasid, has_pasid)
+{
+ .pasid_capable = true,
+};
+
+TEST_F(iommufd_device_pasid, pasid_attach)
+{
+ struct iommu_hwpt_selftest data = {
+ .iotlb = IOMMU_TEST_IOTLB_DEFAULT,
+ };
+ uint32_t nested_hwpt_id[3] = {};
+ uint32_t parent_hwpt_id = 0;
+ uint32_t fault_id, fault_fd;
+ uint32_t s2_hwpt_id = 0;
+ uint32_t iopf_hwpt_id;
+ uint32_t pasid = 100;
+ uint32_t viommu_id;
+
+ /*
+ * Negative, detach pasid without attaching, this is not expected.
+ * But it should not result in failure anyway.
+ */
+ test_cmd_pasid_detach(pasid);
+
+ /* Allocate two nested hwpts sharing one common parent hwpt */
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT,
+ &parent_hwpt_id);
+ test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id,
+ IOMMU_HWPT_ALLOC_PASID,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST,
+ &data, sizeof(data));
+ test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id,
+ IOMMU_HWPT_ALLOC_PASID,
+ &nested_hwpt_id[1],
+ IOMMU_HWPT_DATA_SELFTEST,
+ &data, sizeof(data));
+
+ /* Fault related preparation */
+ test_ioctl_fault_alloc(&fault_id, &fault_fd);
+ test_cmd_hwpt_alloc_iopf(self->device_id, parent_hwpt_id, fault_id,
+ IOMMU_HWPT_FAULT_ID_VALID | IOMMU_HWPT_ALLOC_PASID,
+ &iopf_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+
+ /* Allocate a regular nested hwpt based on viommu */
+ test_cmd_viommu_alloc(self->device_id, parent_hwpt_id,
+ IOMMU_VIOMMU_TYPE_SELFTEST,
+ &viommu_id);
+ test_cmd_hwpt_alloc_nested(self->device_id, viommu_id,
+ IOMMU_HWPT_ALLOC_PASID,
+ &nested_hwpt_id[2],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_PASID,
+ &s2_hwpt_id);
+
+ /* Attach RID to non-pasid compat domain, */
+ test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id);
+ /* then attach to pasid should fail */
+ test_err_pasid_attach(EINVAL, pasid, s2_hwpt_id);
+
+ /* Attach RID to pasid compat domain, */
+ test_cmd_mock_domain_replace(self->stdev_id, s2_hwpt_id);
+ /* then attach to pasid should succeed, */
+ test_cmd_pasid_attach(pasid, nested_hwpt_id[0]);
+ /* but attach RID to non-pasid compat domain should fail now. */
+ test_err_mock_domain_replace(EINVAL, self->stdev_id, parent_hwpt_id);
+ /*
+ * Detach hwpt from pasid 100, and check if the pasid 100
+ * has null domain.
+ */
+ test_cmd_pasid_detach(pasid);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, 0));
+ /* RID is attached to pasid-comapt domain, pasid path is not used */
+
+ if (!variant->pasid_capable) {
+ /*
+ * PASID-compatible domain can be used by non-PASID-capable
+ * device.
+ */
+ test_cmd_mock_domain_replace(self->no_pasid_stdev_id, nested_hwpt_id[0]);
+ test_cmd_mock_domain_replace(self->no_pasid_stdev_id, self->ioas_id);
+ /*
+ * Attach hwpt to pasid 100 of non-PASID-capable device,
+ * should fail, no matter domain is pasid-comapt or not.
+ */
+ EXPECT_ERRNO(EINVAL,
+ _test_cmd_pasid_attach(self->fd, self->no_pasid_stdev_id,
+ pasid, parent_hwpt_id));
+ EXPECT_ERRNO(EINVAL,
+ _test_cmd_pasid_attach(self->fd, self->no_pasid_stdev_id,
+ pasid, s2_hwpt_id));
+ }
+
+ /*
+ * Attach non pasid compat hwpt to pasid-capable device, should
+ * fail, and have null domain.
+ */
+ test_err_pasid_attach(EINVAL, pasid, parent_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, 0));
+
+ /*
+ * Attach ioas to pasid 100, should fail, domain should
+ * be null.
+ */
+ test_err_pasid_attach(EINVAL, pasid, self->ioas_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, 0));
+
+ /*
+ * Attach the s2_hwpt to pasid 100, should succeed, domain should
+ * be valid.
+ */
+ test_cmd_pasid_attach(pasid, s2_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /*
+ * Try attach pasid 100 with another hwpt, should FAIL
+ * as attach does not allow overwrite, use REPLACE instead.
+ */
+ test_err_pasid_attach(EBUSY, pasid, nested_hwpt_id[0]);
+
+ /*
+ * Detach hwpt from pasid 100 for next test, should succeed,
+ * and have null domain.
+ */
+ test_cmd_pasid_detach(pasid);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, 0));
+
+ /*
+ * Attach nested hwpt to pasid 100, should succeed, domain
+ * should be valid.
+ */
+ test_cmd_pasid_attach(pasid, nested_hwpt_id[0]);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, nested_hwpt_id[0]));
+
+ /* Attach to pasid 100 which has been attached, should fail. */
+ test_err_pasid_attach(EBUSY, pasid, nested_hwpt_id[0]);
+
+ /* cleanup pasid 100 */
+ test_cmd_pasid_detach(pasid);
+
+ /* Replace tests */
+
+ pasid = 200;
+ /*
+ * Replace pasid 200 without attaching it, should fail
+ * with -EINVAL.
+ */
+ test_err_pasid_replace(EINVAL, pasid, s2_hwpt_id);
+
+ /*
+ * Attach the s2 hwpt to pasid 200, should succeed, domain should
+ * be valid.
+ */
+ test_cmd_pasid_attach(pasid, s2_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /*
+ * Replace pasid 200 with self->ioas_id, should fail
+ * and domain should be the prior s2 hwpt.
+ */
+ test_err_pasid_replace(EINVAL, pasid, self->ioas_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /*
+ * Replace a nested hwpt for pasid 200, should succeed,
+ * and have valid domain.
+ */
+ test_cmd_pasid_replace(pasid, nested_hwpt_id[0]);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, nested_hwpt_id[0]));
+
+ /*
+ * Replace with another nested hwpt for pasid 200, should
+ * succeed, and have valid domain.
+ */
+ test_cmd_pasid_replace(pasid, nested_hwpt_id[1]);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, nested_hwpt_id[1]));
+
+ /* cleanup pasid 200 */
+ test_cmd_pasid_detach(pasid);
+
+ /* Negative Tests for pasid replace, use pasid 1024 */
+
+ /*
+ * Attach the s2 hwpt to pasid 1024, should succeed, domain should
+ * be valid.
+ */
+ pasid = 1024;
+ test_cmd_pasid_attach(pasid, s2_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /*
+ * Replace pasid 1024 with nested_hwpt_id[0], should fail,
+ * but have the old valid domain. This is a designed
+ * negative case. Normally, this shall succeed.
+ */
+ test_err_pasid_replace(ENOMEM, pasid, nested_hwpt_id[0]);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /* cleanup pasid 1024 */
+ test_cmd_pasid_detach(pasid);
+
+ /* Attach to iopf-capable hwpt */
+
+ /*
+ * Attach an iopf hwpt to pasid 2048, should succeed, domain should
+ * be valid.
+ */
+ pasid = 2048;
+ test_cmd_pasid_attach(pasid, iopf_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, iopf_hwpt_id));
+
+ test_cmd_trigger_iopf_pasid(self->device_id, pasid, fault_fd);
+
+ /*
+ * Replace with s2_hwpt_id for pasid 2048, should
+ * succeed, and have valid domain.
+ */
+ test_cmd_pasid_replace(pasid, s2_hwpt_id);
+ ASSERT_EQ(0,
+ test_cmd_pasid_check_hwpt(self->fd, self->stdev_id,
+ pasid, s2_hwpt_id));
+
+ /* cleanup pasid 2048 */
+ test_cmd_pasid_detach(pasid);
+
+ test_ioctl_destroy(iopf_hwpt_id);
+ close(fault_fd);
+ test_ioctl_destroy(fault_id);
+
+ /* Detach the s2_hwpt_id from RID */
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
index 64b1f8e1b0cf..e11ec4b121fc 100644
--- a/tools/testing/selftests/iommu/iommufd_fail_nth.c
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -209,12 +209,16 @@ FIXTURE(basic_fail_nth)
{
int fd;
uint32_t access_id;
+ uint32_t stdev_id;
+ uint32_t pasid;
};
FIXTURE_SETUP(basic_fail_nth)
{
self->fd = -1;
self->access_id = 0;
+ self->stdev_id = 0;
+ self->pasid = 0; //test should use a non-zero value
}
FIXTURE_TEARDOWN(basic_fail_nth)
@@ -226,6 +230,8 @@ FIXTURE_TEARDOWN(basic_fail_nth)
rc = _test_cmd_destroy_access(self->access_id);
assert(rc == 0);
}
+ if (self->pasid && self->stdev_id)
+ _test_cmd_pasid_detach(self->fd, self->stdev_id, self->pasid);
teardown_iommufd(self->fd, _metadata);
}
@@ -620,10 +626,11 @@ TEST_FAIL_NTH(basic_fail_nth, device)
};
struct iommu_test_hw_info info;
uint32_t fault_id, fault_fd;
+ uint32_t veventq_id, veventq_fd;
uint32_t fault_hwpt_id;
+ uint32_t test_hwpt_id;
uint32_t ioas_id;
uint32_t ioas_id2;
- uint32_t stdev_id;
uint32_t idev_id;
uint32_t hwpt_id;
uint32_t viommu_id;
@@ -654,25 +661,30 @@ TEST_FAIL_NTH(basic_fail_nth, device)
fail_nth_enable();
- if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, NULL,
- &idev_id))
+ if (_test_cmd_mock_domain_flags(self->fd, ioas_id,
+ MOCK_FLAGS_DEVICE_PASID,
+ &self->stdev_id, NULL, &idev_id))
return -1;
- if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL))
+ if (_test_cmd_get_hw_info(self->fd, idev_id, &info,
+ sizeof(info), NULL, NULL))
return -1;
- if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, 0, &hwpt_id,
+ if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0,
+ IOMMU_HWPT_ALLOC_PASID, &hwpt_id,
IOMMU_HWPT_DATA_NONE, 0, 0))
return -1;
- if (_test_cmd_mock_domain_replace(self->fd, stdev_id, ioas_id2, NULL))
+ if (_test_cmd_mock_domain_replace(self->fd, self->stdev_id, ioas_id2, NULL))
return -1;
- if (_test_cmd_mock_domain_replace(self->fd, stdev_id, hwpt_id, NULL))
+ if (_test_cmd_mock_domain_replace(self->fd, self->stdev_id, hwpt_id, NULL))
return -1;
if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0,
- IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT |
+ IOMMU_HWPT_ALLOC_PASID,
+ &hwpt_id,
IOMMU_HWPT_DATA_NONE, 0, 0))
return -1;
@@ -692,6 +704,37 @@ TEST_FAIL_NTH(basic_fail_nth, device)
IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)))
return -1;
+ if (_test_cmd_veventq_alloc(self->fd, viommu_id,
+ IOMMU_VEVENTQ_TYPE_SELFTEST, &veventq_id,
+ &veventq_fd))
+ return -1;
+ close(veventq_fd);
+
+ if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0,
+ IOMMU_HWPT_ALLOC_PASID,
+ &test_hwpt_id,
+ IOMMU_HWPT_DATA_NONE, 0, 0))
+ return -1;
+
+ /* Tests for pasid attach/replace/detach */
+
+ self->pasid = 200;
+
+ if (_test_cmd_pasid_attach(self->fd, self->stdev_id,
+ self->pasid, hwpt_id)) {
+ self->pasid = 0;
+ return -1;
+ }
+
+ if (_test_cmd_pasid_replace(self->fd, self->stdev_id,
+ self->pasid, test_hwpt_id))
+ return -1;
+
+ if (_test_cmd_pasid_detach(self->fd, self->stdev_id, self->pasid))
+ return -1;
+
+ self->pasid = 0;
+
return 0;
}
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index d979f5b0efe8..72f6636e5d90 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -9,6 +9,7 @@
#include <sys/ioctl.h>
#include <stdint.h>
#include <assert.h>
+#include <poll.h>
#include "../kselftest_harness.h"
#include "../../../../drivers/iommu/iommufd/iommufd_test.h"
@@ -757,7 +758,8 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata)
/* @data can be NULL */
static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
- size_t data_len, uint32_t *capabilities)
+ size_t data_len, uint32_t *capabilities,
+ uint8_t *max_pasid)
{
struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data;
struct iommu_hw_info cmd = {
@@ -802,6 +804,9 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
assert(!info->flags);
}
+ if (max_pasid)
+ *max_pasid = cmd.out_max_pasid_log2;
+
if (capabilities)
*capabilities = cmd.out_capabilities;
@@ -810,14 +815,19 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
#define test_cmd_get_hw_info(device_id, data, data_len) \
ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \
- data_len, NULL))
+ data_len, NULL, NULL))
#define test_err_get_hw_info(_errno, device_id, data, data_len) \
EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \
- data_len, NULL))
+ data_len, NULL, NULL))
#define test_cmd_get_hw_capabilities(device_id, caps, mask) \
- ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps))
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \
+ 0, &caps, NULL))
+
+#define test_cmd_get_hw_info_pasid(device_id, max_pasid) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, \
+ 0, NULL, max_pasid))
static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd)
{
@@ -842,14 +852,15 @@ static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd)
ASSERT_NE(0, *(fault_fd)); \
})
-static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd)
+static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 pasid,
+ __u32 fault_fd)
{
struct iommu_test_cmd trigger_iopf_cmd = {
.size = sizeof(trigger_iopf_cmd),
.op = IOMMU_TEST_OP_TRIGGER_IOPF,
.trigger_iopf = {
.dev_id = device_id,
- .pasid = 0x1,
+ .pasid = pasid,
.grpid = 0x2,
.perm = IOMMU_PGFAULT_PERM_READ | IOMMU_PGFAULT_PERM_WRITE,
.addr = 0xdeadbeaf,
@@ -880,7 +891,10 @@ static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd)
}
#define test_cmd_trigger_iopf(device_id, fault_fd) \
- ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd))
+ ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, 0x1, fault_fd))
+#define test_cmd_trigger_iopf_pasid(device_id, pasid, fault_fd) \
+ ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, \
+ pasid, fault_fd))
static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id,
__u32 type, __u32 flags, __u32 *viommu_id)
@@ -936,3 +950,204 @@ static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id,
EXPECT_ERRNO(_errno, \
_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \
virt_id, vdev_id))
+
+static int _test_cmd_veventq_alloc(int fd, __u32 viommu_id, __u32 type,
+ __u32 *veventq_id, __u32 *veventq_fd)
+{
+ struct iommu_veventq_alloc cmd = {
+ .size = sizeof(cmd),
+ .type = type,
+ .veventq_depth = 2,
+ .viommu_id = viommu_id,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_VEVENTQ_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ if (veventq_id)
+ *veventq_id = cmd.out_veventq_id;
+ if (veventq_fd)
+ *veventq_fd = cmd.out_veventq_fd;
+ return 0;
+}
+
+#define test_cmd_veventq_alloc(viommu_id, type, veventq_id, veventq_fd) \
+ ASSERT_EQ(0, _test_cmd_veventq_alloc(self->fd, viommu_id, type, \
+ veventq_id, veventq_fd))
+#define test_err_veventq_alloc(_errno, viommu_id, type, veventq_id, \
+ veventq_fd) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_veventq_alloc(self->fd, viommu_id, type, \
+ veventq_id, veventq_fd))
+
+static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents)
+{
+ struct iommu_test_cmd trigger_vevent_cmd = {
+ .size = sizeof(trigger_vevent_cmd),
+ .op = IOMMU_TEST_OP_TRIGGER_VEVENT,
+ .trigger_vevent = {
+ .dev_id = dev_id,
+ },
+ };
+ int ret;
+
+ while (nvevents--) {
+ ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT),
+ &trigger_vevent_cmd);
+ if (ret < 0)
+ return -1;
+ }
+ return ret;
+}
+
+#define test_cmd_trigger_vevents(dev_id, nvevents) \
+ ASSERT_EQ(0, _test_cmd_trigger_vevents(self->fd, dev_id, nvevents))
+
+static int _test_cmd_read_vevents(int fd, __u32 event_fd, __u32 nvevents,
+ __u32 virt_id, int *prev_seq)
+{
+ struct pollfd pollfd = { .fd = event_fd, .events = POLLIN };
+ struct iommu_viommu_event_selftest *event;
+ struct iommufd_vevent_header *hdr;
+ ssize_t bytes;
+ void *data;
+ int ret, i;
+
+ ret = poll(&pollfd, 1, 1000);
+ if (ret < 0)
+ return -1;
+
+ data = calloc(nvevents, sizeof(*hdr) + sizeof(*event));
+ if (!data) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ bytes = read(event_fd, data,
+ nvevents * (sizeof(*hdr) + sizeof(*event)));
+ if (bytes <= 0) {
+ errno = EFAULT;
+ ret = -1;
+ goto out_free;
+ }
+
+ for (i = 0; i < nvevents; i++) {
+ hdr = data + i * (sizeof(*hdr) + sizeof(*event));
+
+ if (hdr->flags & IOMMU_VEVENTQ_FLAG_LOST_EVENTS ||
+ hdr->sequence - *prev_seq > 1) {
+ *prev_seq = hdr->sequence;
+ errno = EOVERFLOW;
+ ret = -1;
+ goto out_free;
+ }
+ *prev_seq = hdr->sequence;
+ event = data + sizeof(*hdr);
+ if (event->virt_id != virt_id) {
+ errno = EINVAL;
+ ret = -1;
+ goto out_free;
+ }
+ }
+
+ ret = 0;
+out_free:
+ free(data);
+ return ret;
+}
+
+#define test_cmd_read_vevents(event_fd, nvevents, virt_id, prev_seq) \
+ ASSERT_EQ(0, _test_cmd_read_vevents(self->fd, event_fd, nvevents, \
+ virt_id, prev_seq))
+#define test_err_read_vevents(_errno, event_fd, nvevents, virt_id, prev_seq) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_read_vevents(self->fd, event_fd, nvevents, \
+ virt_id, prev_seq))
+
+static int _test_cmd_pasid_attach(int fd, __u32 stdev_id, __u32 pasid,
+ __u32 pt_id)
+{
+ struct iommu_test_cmd test_attach = {
+ .size = sizeof(test_attach),
+ .op = IOMMU_TEST_OP_PASID_ATTACH,
+ .id = stdev_id,
+ .pasid_attach = {
+ .pasid = pasid,
+ .pt_id = pt_id,
+ },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_ATTACH),
+ &test_attach);
+}
+
+#define test_cmd_pasid_attach(pasid, hwpt_id) \
+ ASSERT_EQ(0, _test_cmd_pasid_attach(self->fd, self->stdev_id, \
+ pasid, hwpt_id))
+
+#define test_err_pasid_attach(_errno, pasid, hwpt_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_pasid_attach(self->fd, self->stdev_id, \
+ pasid, hwpt_id))
+
+static int _test_cmd_pasid_replace(int fd, __u32 stdev_id, __u32 pasid,
+ __u32 pt_id)
+{
+ struct iommu_test_cmd test_replace = {
+ .size = sizeof(test_replace),
+ .op = IOMMU_TEST_OP_PASID_REPLACE,
+ .id = stdev_id,
+ .pasid_replace = {
+ .pasid = pasid,
+ .pt_id = pt_id,
+ },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_REPLACE),
+ &test_replace);
+}
+
+#define test_cmd_pasid_replace(pasid, hwpt_id) \
+ ASSERT_EQ(0, _test_cmd_pasid_replace(self->fd, self->stdev_id, \
+ pasid, hwpt_id))
+
+#define test_err_pasid_replace(_errno, pasid, hwpt_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_pasid_replace(self->fd, self->stdev_id, \
+ pasid, hwpt_id))
+
+static int _test_cmd_pasid_detach(int fd, __u32 stdev_id, __u32 pasid)
+{
+ struct iommu_test_cmd test_detach = {
+ .size = sizeof(test_detach),
+ .op = IOMMU_TEST_OP_PASID_DETACH,
+ .id = stdev_id,
+ .pasid_detach = {
+ .pasid = pasid,
+ },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_DETACH),
+ &test_detach);
+}
+
+#define test_cmd_pasid_detach(pasid) \
+ ASSERT_EQ(0, _test_cmd_pasid_detach(self->fd, self->stdev_id, pasid))
+
+static int test_cmd_pasid_check_hwpt(int fd, __u32 stdev_id, __u32 pasid,
+ __u32 hwpt_id)
+{
+ struct iommu_test_cmd test_pasid_check = {
+ .size = sizeof(test_pasid_check),
+ .op = IOMMU_TEST_OP_PASID_CHECK_HWPT,
+ .id = stdev_id,
+ .pasid_check = {
+ .pasid = pasid,
+ .hwpt_id = hwpt_id,
+ },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_PASID_CHECK_HWPT),
+ &test_pasid_check);
+}
diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile
index 67fe7a46cb62..e3000ccb9a5d 100644
--- a/tools/testing/selftests/kexec/Makefile
+++ b/tools/testing/selftests/kexec/Makefile
@@ -8,6 +8,13 @@ ifeq ($(ARCH_PROCESSED),$(filter $(ARCH_PROCESSED),x86 ppc64le))
TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh
TEST_FILES := kexec_common_lib.sh
+include ../../../scripts/Makefile.arch
+
+ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86)
+TEST_PROGS += test_kexec_jump.sh
+test_kexec_jump.sh: $(OUTPUT)/test_kexec_jump
+endif
+
include ../lib.mk
endif
diff --git a/tools/testing/selftests/kexec/test_kexec_jump.c b/tools/testing/selftests/kexec/test_kexec_jump.c
new file mode 100644
index 000000000000..fbce287866f5
--- /dev/null
+++ b/tools/testing/selftests/kexec/test_kexec_jump.c
@@ -0,0 +1,72 @@
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/kexec.h>
+#include <linux/reboot.h>
+#include <sys/reboot.h>
+#include <sys/syscall.h>
+
+asm(
+ " .code64\n"
+ " .data\n"
+ "purgatory_start:\n"
+
+ // Trigger kexec debug exception handling
+ " int3\n"
+
+ // Set load address for next time
+ " leaq purgatory_start_b(%rip), %r11\n"
+ " movq %r11, 8(%rsp)\n"
+
+ // Back to Linux
+ " ret\n"
+
+ // Same again
+ "purgatory_start_b:\n"
+
+ // Trigger kexec debug exception handling
+ " int3\n"
+
+ // Set load address for next time
+ " leaq purgatory_start(%rip), %r11\n"
+ " movq %r11, 8(%rsp)\n"
+
+ // Back to Linux
+ " ret\n"
+
+ "purgatory_end:\n"
+ ".previous"
+);
+extern char purgatory_start[], purgatory_end[];
+
+int main (void)
+{
+ struct kexec_segment segment = {};
+ int ret;
+
+ segment.buf = purgatory_start;
+ segment.bufsz = purgatory_end - purgatory_start;
+ segment.mem = (void *)0x400000;
+ segment.memsz = 0x1000;
+ ret = syscall(__NR_kexec_load, 0x400000, 1, &segment, KEXEC_PRESERVE_CONTEXT);
+ if (ret) {
+ perror("kexec_load");
+ exit(1);
+ }
+
+ ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC);
+ if (ret) {
+ perror("kexec reboot");
+ exit(1);
+ }
+
+ ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC);
+ if (ret) {
+ perror("kexec reboot");
+ exit(1);
+ }
+ printf("Success\n");
+ return 0;
+}
+
diff --git a/tools/testing/selftests/kexec/test_kexec_jump.sh b/tools/testing/selftests/kexec/test_kexec_jump.sh
new file mode 100755
index 000000000000..6ae977054ba2
--- /dev/null
+++ b/tools/testing/selftests/kexec/test_kexec_jump.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Prevent loading a kernel image via the kexec_load syscall when
+# signatures are required. (Dependent on CONFIG_IMA_ARCH_POLICY.)
+
+TEST="$0"
+. ./kexec_common_lib.sh
+
+# kexec requires root privileges
+require_root_privileges
+
+# get the kernel config
+get_kconfig
+
+kconfig_enabled "CONFIG_KEXEC_JUMP=y" "kexec_jump is enabled"
+if [ $? -eq 0 ]; then
+ log_skip "kexec_jump is not enabled"
+fi
+
+kconfig_enabled "CONFIG_IMA_APPRAISE=y" "IMA enabled"
+ima_appraise=$?
+
+kconfig_enabled "CONFIG_IMA_ARCH_POLICY=y" \
+ "IMA architecture specific policy enabled"
+arch_policy=$?
+
+get_secureboot_mode
+secureboot=$?
+
+if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ]; then
+ log_skip "Secure boot and CONFIG_IMA_ARCH_POLICY are enabled"
+fi
+
+./test_kexec_jump
+if [ $? -eq 0 ]; then
+ log_pass "kexec_jump succeeded"
+else
+ # The more likely failure mode if anything went wrong is that the
+ # kernel just crashes. But if we get back here, sure, whine anyway.
+ log_fail "kexec_jump failed"
+fi
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index f773f8f99249..f62b0a5aba35 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -50,8 +50,18 @@ LIBKVM_riscv += lib/riscv/ucall.c
# Non-compiled test targets
TEST_PROGS_x86 += x86/nx_huge_pages_test.sh
+# Compiled test targets valid on all architectures with libkvm support
+TEST_GEN_PROGS_COMMON = demand_paging_test
+TEST_GEN_PROGS_COMMON += dirty_log_test
+TEST_GEN_PROGS_COMMON += guest_print_test
+TEST_GEN_PROGS_COMMON += kvm_binary_stats_test
+TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus
+TEST_GEN_PROGS_COMMON += kvm_page_table_test
+TEST_GEN_PROGS_COMMON += set_memory_region_test
+
# Compiled test targets
-TEST_GEN_PROGS_x86 = x86/cpuid_test
+TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_x86 += x86/cpuid_test
TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86 += x86/feature_msrs_test
@@ -119,27 +129,21 @@ TEST_GEN_PROGS_x86 += x86/triple_fault_event_test
TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test
TEST_GEN_PROGS_x86 += access_tracking_perf_test
TEST_GEN_PROGS_x86 += coalesced_io_test
-TEST_GEN_PROGS_x86 += demand_paging_test
-TEST_GEN_PROGS_x86 += dirty_log_test
TEST_GEN_PROGS_x86 += dirty_log_perf_test
TEST_GEN_PROGS_x86 += guest_memfd_test
-TEST_GEN_PROGS_x86 += guest_print_test
TEST_GEN_PROGS_x86 += hardware_disable_test
-TEST_GEN_PROGS_x86 += kvm_create_max_vcpus
-TEST_GEN_PROGS_x86 += kvm_page_table_test
TEST_GEN_PROGS_x86 += memslot_modification_stress_test
TEST_GEN_PROGS_x86 += memslot_perf_test
TEST_GEN_PROGS_x86 += mmu_stress_test
TEST_GEN_PROGS_x86 += rseq_test
-TEST_GEN_PROGS_x86 += set_memory_region_test
TEST_GEN_PROGS_x86 += steal_time
-TEST_GEN_PROGS_x86 += kvm_binary_stats_test
TEST_GEN_PROGS_x86 += system_counter_offset_test
TEST_GEN_PROGS_x86 += pre_fault_memory_test
# Compiled outputs used by test targets
TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test
+TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs
TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases
TEST_GEN_PROGS_arm64 += arm64/debug-exceptions
@@ -158,22 +162,16 @@ TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3
TEST_GEN_PROGS_arm64 += access_tracking_perf_test
TEST_GEN_PROGS_arm64 += arch_timer
TEST_GEN_PROGS_arm64 += coalesced_io_test
-TEST_GEN_PROGS_arm64 += demand_paging_test
-TEST_GEN_PROGS_arm64 += dirty_log_test
TEST_GEN_PROGS_arm64 += dirty_log_perf_test
-TEST_GEN_PROGS_arm64 += guest_print_test
TEST_GEN_PROGS_arm64 += get-reg-list
-TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
-TEST_GEN_PROGS_arm64 += kvm_page_table_test
TEST_GEN_PROGS_arm64 += memslot_modification_stress_test
TEST_GEN_PROGS_arm64 += memslot_perf_test
TEST_GEN_PROGS_arm64 += mmu_stress_test
TEST_GEN_PROGS_arm64 += rseq_test
-TEST_GEN_PROGS_arm64 += set_memory_region_test
TEST_GEN_PROGS_arm64 += steal_time
-TEST_GEN_PROGS_arm64 += kvm_binary_stats_test
-TEST_GEN_PROGS_s390 = s390/memop
+TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON)
+TEST_GEN_PROGS_s390 += s390/memop
TEST_GEN_PROGS_s390 += s390/resets
TEST_GEN_PROGS_s390 += s390/sync_regs_test
TEST_GEN_PROGS_s390 += s390/tprot
@@ -182,27 +180,14 @@ TEST_GEN_PROGS_s390 += s390/debug_test
TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test
TEST_GEN_PROGS_s390 += s390/shared_zeropage_test
TEST_GEN_PROGS_s390 += s390/ucontrol_test
-TEST_GEN_PROGS_s390 += demand_paging_test
-TEST_GEN_PROGS_s390 += dirty_log_test
-TEST_GEN_PROGS_s390 += guest_print_test
-TEST_GEN_PROGS_s390 += kvm_create_max_vcpus
-TEST_GEN_PROGS_s390 += kvm_page_table_test
TEST_GEN_PROGS_s390 += rseq_test
-TEST_GEN_PROGS_s390 += set_memory_region_test
-TEST_GEN_PROGS_s390 += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON)
TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
TEST_GEN_PROGS_riscv += riscv/ebreak_test
TEST_GEN_PROGS_riscv += arch_timer
TEST_GEN_PROGS_riscv += coalesced_io_test
-TEST_GEN_PROGS_riscv += demand_paging_test
-TEST_GEN_PROGS_riscv += dirty_log_test
TEST_GEN_PROGS_riscv += get-reg-list
-TEST_GEN_PROGS_riscv += guest_print_test
-TEST_GEN_PROGS_riscv += kvm_binary_stats_test
-TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
-TEST_GEN_PROGS_riscv += kvm_page_table_test
-TEST_GEN_PROGS_riscv += set_memory_region_test
TEST_GEN_PROGS_riscv += steal_time
SPLIT_TESTS += arch_timer
diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c
index ec33a8f9c908..dc6559dad9d8 100644
--- a/tools/testing/selftests/kvm/arm64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c
@@ -199,7 +199,7 @@ static bool guest_set_ha(void)
if (hadbs == 0)
return false;
- tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
+ tcr = read_sysreg(tcr_el1) | TCR_HA;
write_sysreg(tcr, tcr_el1);
isb();
diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c
index 322b9d3b0125..57708de2075d 100644
--- a/tools/testing/selftests/kvm/arm64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c
@@ -129,10 +129,10 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1),
REG_FTR_END,
};
diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h
index 1e8d0d531fbd..b0fc0f945766 100644
--- a/tools/testing/selftests/kvm/include/arm64/processor.h
+++ b/tools/testing/selftests/kvm/include/arm64/processor.h
@@ -62,6 +62,67 @@
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
+/* TCR_EL1 specific flags */
+#define TCR_T0SZ_OFFSET 0
+#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET)
+
+#define TCR_IRGN0_SHIFT 8
+#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
+#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)
+
+#define TCR_ORGN0_SHIFT 10
+#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT)
+#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT)
+
+#define TCR_SH0_SHIFT 12
+#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT)
+#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT)
+
+#define TCR_TG0_SHIFT 14
+#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT)
+#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT)
+#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
+#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)
+
+#define TCR_IPS_SHIFT 32
+#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT)
+#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT)
+#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT)
+#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT)
+#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT)
+
+#define TCR_HA (UL(1) << 39)
+#define TCR_DS (UL(1) << 59)
+
+/*
+ * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
+ */
+#define PTE_ATTRINDX(t) ((t) << 2)
+#define PTE_ATTRINDX_MASK GENMASK(4, 2)
+#define PTE_ATTRINDX_SHIFT 2
+
+#define PTE_VALID BIT(0)
+#define PGD_TYPE_TABLE BIT(1)
+#define PUD_TYPE_TABLE BIT(1)
+#define PMD_TYPE_TABLE BIT(1)
+#define PTE_TYPE_PAGE BIT(1)
+
+#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */
+#define PTE_AF BIT(10)
+
+#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift))
+#define PTE_ADDR_51_48 GENMASK(15, 12)
+#define PTE_ADDR_51_48_SHIFT 12
+#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift))
+#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
+#define PTE_ADDR_51_50_LPA2_SHIFT 8
+
void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_vcpu_init *init, void *guest_code);
@@ -102,12 +163,6 @@ enum {
(v) == VECTOR_SYNC_LOWER_64 || \
(v) == VECTOR_SYNC_LOWER_32)
-/* Access flag */
-#define PTE_AF (1ULL << 10)
-
-/* Access flag update enable/disable */
-#define TCR_EL1_HA (1ULL << 39)
-
void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
uint32_t *ipa16k, uint32_t *ipa64k);
diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c
index 7ba3aa3755f3..9d69904cb608 100644
--- a/tools/testing/selftests/kvm/lib/arm64/processor.c
+++ b/tools/testing/selftests/kvm/lib/arm64/processor.c
@@ -72,13 +72,13 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
uint64_t pte;
if (use_lpa2_pte_format(vm)) {
- pte = pa & GENMASK(49, vm->page_shift);
- pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
- attrs &= ~GENMASK(9, 8);
+ pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT;
+ attrs &= ~PTE_ADDR_51_50_LPA2;
} else {
- pte = pa & GENMASK(47, vm->page_shift);
+ pte = pa & PTE_ADDR_MASK(vm->page_shift);
if (vm->page_shift == 16)
- pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT;
}
pte |= attrs;
@@ -90,12 +90,12 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
uint64_t pa;
if (use_lpa2_pte_format(vm)) {
- pa = pte & GENMASK(49, vm->page_shift);
- pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
+ pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift);
+ pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50;
} else {
- pa = pte & GENMASK(47, vm->page_shift);
+ pa = pte & PTE_ADDR_MASK(vm->page_shift);
if (vm->page_shift == 16)
- pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+ pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48;
}
return pa;
@@ -128,7 +128,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint64_t flags)
{
- uint8_t attr_idx = flags & 7;
+ uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT);
+ uint64_t pg_attr;
uint64_t *ptep;
TEST_ASSERT((vaddr % vm->page_size) == 0,
@@ -147,18 +148,21 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PGD_TYPE_TABLE | PTE_VALID);
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PUD_TYPE_TABLE | PTE_VALID);
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
if (!*ptep)
- *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm),
+ PMD_TYPE_TABLE | PTE_VALID);
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
@@ -167,7 +171,11 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
+ pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID;
+ if (!use_lpa2_pte_format(vm))
+ pg_attr |= PTE_SHARED;
+
+ *ptep = addr_pte(vm, paddr, pg_attr);
}
void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -293,20 +301,20 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
case VM_MODE_P48V48_64K:
case VM_MODE_P40V48_64K:
case VM_MODE_P36V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
+ tcr_el1 |= TCR_TG0_64K;
break;
case VM_MODE_P52V48_16K:
case VM_MODE_P48V48_16K:
case VM_MODE_P40V48_16K:
case VM_MODE_P36V48_16K:
case VM_MODE_P36V47_16K:
- tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
+ tcr_el1 |= TCR_TG0_16K;
break;
case VM_MODE_P52V48_4K:
case VM_MODE_P48V48_4K:
case VM_MODE_P40V48_4K:
case VM_MODE_P36V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
+ tcr_el1 |= TCR_TG0_4K;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
@@ -319,35 +327,35 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
case VM_MODE_P52V48_4K:
case VM_MODE_P52V48_16K:
case VM_MODE_P52V48_64K:
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ tcr_el1 |= TCR_IPS_52_BITS;
ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
break;
case VM_MODE_P48V48_4K:
case VM_MODE_P48V48_16K:
case VM_MODE_P48V48_64K:
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
+ tcr_el1 |= TCR_IPS_48_BITS;
break;
case VM_MODE_P40V48_4K:
case VM_MODE_P40V48_16K:
case VM_MODE_P40V48_64K:
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
+ tcr_el1 |= TCR_IPS_40_BITS;
break;
case VM_MODE_P36V48_4K:
case VM_MODE_P36V48_16K:
case VM_MODE_P36V48_64K:
case VM_MODE_P36V47_16K:
- tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+ tcr_el1 |= TCR_IPS_36_BITS;
break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
- sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
- /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
- tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
- tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
+ sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I;
+
+ tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER;
+ tcr_el1 |= TCR_T0SZ(vm->va_bits);
if (use_lpa2_pte_format(vm))
- tcr_el1 |= (1ul << 59) /* DS */;
+ tcr_el1 |= TCR_DS;
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 279ad8946040..815bc45dd8dc 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -2019,9 +2019,8 @@ static struct exit_reason {
KVM_EXIT_STRING(RISCV_SBI),
KVM_EXIT_STRING(RISCV_CSR),
KVM_EXIT_STRING(NOTIFY),
-#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
- KVM_EXIT_STRING(MEMORY_NOT_PRESENT),
-#endif
+ KVM_EXIT_STRING(LOONGARCH_IOCSR),
+ KVM_EXIT_STRING(MEMORY_FAULT),
};
/*
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 8515921dfdbf..569f2d67c9b8 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -53,8 +53,10 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVVPTC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAAMO:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZABHA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZALRSC:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB:
@@ -434,8 +436,10 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(SVNAPOT),
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(SVVPTC),
+ KVM_ISA_EXT_ARR(ZAAMO),
KVM_ISA_EXT_ARR(ZABHA),
KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZALRSC),
KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
KVM_ISA_EXT_ARR(ZBB),
@@ -974,8 +978,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
KVM_ISA_EXT_SIMPLE_CONFIG(svvptc, SVVPTC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zaamo, ZAAMO);
KVM_ISA_EXT_SIMPLE_CONFIG(zabha, ZABHA);
KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zalrsc, ZALRSC);
KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS);
KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB);
@@ -1045,8 +1051,10 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_svnapot,
&config_svpbmt,
&config_svvptc,
+ &config_zaamo,
&config_zabha,
&config_zacas,
+ &config_zalrsc,
&config_zawrs,
&config_zba,
&config_zbb,
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index e5898678bfab..1375fca80bcd 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -196,25 +196,27 @@ static void calc_min_max_cpu(void)
static void help(const char *name)
{
puts("");
- printf("usage: %s [-h] [-u]\n", name);
+ printf("usage: %s [-h] [-u] [-l latency]\n", name);
printf(" -u: Don't sanity check the number of successful KVM_RUNs\n");
+ printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n");
puts("");
exit(0);
}
int main(int argc, char *argv[])
{
+ int r, i, snapshot, opt, fd = -1, latency = -1;
bool skip_sanity_check = false;
- int r, i, snapshot;
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
u32 cpu, rseq_cpu;
- int opt;
- while ((opt = getopt(argc, argv, "hu")) != -1) {
+ while ((opt = getopt(argc, argv, "hl:u")) != -1) {
switch (opt) {
case 'u':
skip_sanity_check = true;
+ case 'l':
+ latency = atoi_paranoid(optarg);
break;
case 'h':
default:
@@ -243,6 +245,20 @@ int main(int argc, char *argv[])
pthread_create(&migration_thread, NULL, migration_worker,
(void *)(unsigned long)syscall(SYS_gettid));
+ if (latency >= 0) {
+ /*
+ * Writes to cpu_dma_latency persist only while the file is
+ * open, i.e. it allows userspace to provide guaranteed latency
+ * while running a workload. Keep the file open until the test
+ * completes, otherwise writing cpu_dma_latency is meaningless.
+ */
+ fd = open("/dev/cpu_dma_latency", O_RDWR);
+ TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd));
+
+ r = write(fd, &latency, 4);
+ TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency");
+ }
+
for (i = 0; !done; i++) {
vcpu_run(vcpu);
TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
@@ -278,6 +294,9 @@ int main(int argc, char *argv[])
"rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
}
+ if (fd > 0)
+ close(fd);
+
/*
* Sanity check that the test was able to enter the guest a reasonable
* number of times, e.g. didn't get stalled too often/long waiting for
@@ -293,8 +312,8 @@ int main(int argc, char *argv[])
TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2),
"Only performed %d KVM_RUNs, task stalled too much?\n\n"
" Try disabling deep sleep states to reduce CPU wakeup latency,\n"
- " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n"
- " or run with -u to disable this sanity check.", i);
+ " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n"
+ " disable this sanity check.", i);
pthread_join(migration_thread, NULL);
diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
index 2b550eff35f1..390ae2d87493 100644
--- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c
@@ -7,6 +7,7 @@
#include "kvm_util.h"
#include "processor.h"
+#include "kselftest.h"
#define CPUID_MWAIT (1u << 3)
@@ -14,6 +15,8 @@ enum monitor_mwait_testcases {
MWAIT_QUIRK_DISABLED = BIT(0),
MISC_ENABLES_QUIRK_DISABLED = BIT(1),
MWAIT_DISABLED = BIT(2),
+ CPUID_DISABLED = BIT(3),
+ TEST_MAX = CPUID_DISABLED * 2 - 1,
};
/*
@@ -35,11 +38,19 @@ do { \
testcase, vector); \
} while (0)
-static void guest_monitor_wait(int testcase)
+static void guest_monitor_wait(void *arg)
{
+ int testcase = (int) (long) arg;
u8 vector;
- GUEST_SYNC(testcase);
+ u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT;
+ if (!(testcase & MWAIT_DISABLED))
+ val |= MSR_IA32_MISC_ENABLE_MWAIT;
+ wrmsr(MSR_IA32_MISC_ENABLE, val);
+
+ __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED),
+ "Expected CPUID.MWAIT %s\n",
+ (testcase & MWAIT_DISABLED) ? "cleared" : "set");
/*
* Arbitrarily MONITOR this function, SVM performs fault checks before
@@ -50,19 +61,6 @@ static void guest_monitor_wait(int testcase)
vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
-}
-
-static void guest_code(void)
-{
- guest_monitor_wait(MWAIT_DISABLED);
-
- guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
-
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
- guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
GUEST_DONE();
}
@@ -74,56 +72,64 @@ int main(int argc, char *argv[])
struct kvm_vm *vm;
struct ucall uc;
int testcase;
+ char test[80];
- TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ ksft_print_header();
+ ksft_set_plan(12);
+ for (testcase = 0; testcase <= TEST_MAX; testcase++) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait);
+ vcpu_args_set(vcpu, 1, (void *)(long)testcase);
+
+ disabled_quirks = 0;
+ if (testcase & MWAIT_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+ strcpy(test, "MWAIT can fault");
+ } else {
+ strcpy(test, "MWAIT never faults");
+ }
+ if (testcase & MISC_ENABLES_QUIRK_DISABLED) {
+ disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+ strcat(test, ", MISC_ENABLE updates CPUID");
+ } else {
+ strcat(test, ", no CPUID updates");
+ }
+
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+ if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) &&
+ (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED)))
+ continue;
+
+ if (testcase & CPUID_DISABLED) {
+ strcat(test, ", CPUID clear");
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ } else {
+ strcat(test, ", CPUID set");
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+ }
+
+ if (testcase & MWAIT_DISABLED)
+ strcat(test, ", MWAIT disabled");
- while (1) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
switch (get_ucall(vcpu, &uc)) {
- case UCALL_SYNC:
- testcase = uc.args[1];
- break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT(uc);
- goto done;
+ /* Detected in vcpu_run */
+ break;
case UCALL_DONE:
- goto done;
+ ksft_test_result_pass("%s\n", test);
+ break;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
- goto done;
- }
-
- disabled_quirks = 0;
- if (testcase & MWAIT_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
- if (testcase & MISC_ENABLES_QUIRK_DISABLED)
- disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
- vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
-
- /*
- * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
- * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
- * bit in MISC_ENABLES accordingly. If the quirk is enabled,
- * the only valid configuration is MWAIT disabled, as CPUID
- * can't be manually changed after running the vCPU.
- */
- if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
- TEST_ASSERT(testcase & MWAIT_DISABLED,
- "Can't toggle CPUID features after running vCPU");
- continue;
+ break;
}
-
- vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
- (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+ kvm_vm_free(vm);
}
+ ksft_finished();
-done:
- kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h
index b9054086a0c9..18a6014920b5 100644
--- a/tools/testing/selftests/landlock/audit.h
+++ b/tools/testing/selftests/landlock/audit.h
@@ -300,15 +300,22 @@ out:
return err;
}
-static int __maybe_unused matches_log_domain_allocated(int audit_fd,
+static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid,
__u64 *domain_id)
{
- return audit_match_record(
- audit_fd, AUDIT_LANDLOCK_DOMAIN,
- REGEX_LANDLOCK_PREFIX
- " status=allocated mode=enforcing pid=[0-9]\\+ uid=[0-9]\\+"
- " exe=\"[^\"]\\+\" comm=\".*_test\"$",
- domain_id);
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " status=allocated mode=enforcing pid=%d uid=[0-9]\\+"
+ " exe=\"[^\"]\\+\" comm=\".*_test\"$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len =
+ snprintf(log_match, sizeof(log_match), log_template, pid);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match,
+ domain_id);
}
static int __maybe_unused matches_log_domain_deallocated(
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
index a0643070c403..cfc571afd0eb 100644
--- a/tools/testing/selftests/landlock/audit_test.c
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -9,6 +9,7 @@
#include <errno.h>
#include <limits.h>
#include <linux/landlock.h>
+#include <pthread.h>
#include <stdlib.h>
#include <sys/mount.h>
#include <sys/prctl.h>
@@ -40,7 +41,6 @@ FIXTURE(audit)
{
struct audit_filter audit_filter;
int audit_fd;
- __u64(*domain_stack)[16];
};
FIXTURE_SETUP(audit)
@@ -60,18 +60,10 @@ FIXTURE_SETUP(audit)
TH_LOG("Failed to initialize audit: %s", error_msg);
}
clear_cap(_metadata, CAP_AUDIT_CONTROL);
-
- self->domain_stack = mmap(NULL, sizeof(*self->domain_stack),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, -1, 0);
- ASSERT_NE(MAP_FAILED, self->domain_stack);
- memset(self->domain_stack, 0, sizeof(*self->domain_stack));
}
FIXTURE_TEARDOWN(audit)
{
- EXPECT_EQ(0, munmap(self->domain_stack, sizeof(*self->domain_stack)));
-
set_cap(_metadata, CAP_AUDIT_CONTROL);
EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
clear_cap(_metadata, CAP_AUDIT_CONTROL);
@@ -83,9 +75,15 @@ TEST_F(audit, layers)
.scoped = LANDLOCK_SCOPE_SIGNAL,
};
int status, ruleset_fd, i;
+ __u64(*domain_stack)[16];
__u64 prev_dom = 3;
pid_t child;
+ domain_stack = mmap(NULL, sizeof(*domain_stack), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, domain_stack);
+ memset(domain_stack, 0, sizeof(*domain_stack));
+
ruleset_fd =
landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
@@ -94,7 +92,7 @@ TEST_F(audit, layers)
child = fork();
ASSERT_LE(0, child);
if (child == 0) {
- for (i = 0; i < ARRAY_SIZE(*self->domain_stack); i++) {
+ for (i = 0; i < ARRAY_SIZE(*domain_stack); i++) {
__u64 denial_dom = 1;
__u64 allocated_dom = 2;
@@ -107,7 +105,8 @@ TEST_F(audit, layers)
matches_log_signal(_metadata, self->audit_fd,
getppid(), &denial_dom));
EXPECT_EQ(0, matches_log_domain_allocated(
- self->audit_fd, &allocated_dom));
+ self->audit_fd, getpid(),
+ &allocated_dom));
EXPECT_NE(denial_dom, 1);
EXPECT_NE(denial_dom, 0);
EXPECT_EQ(denial_dom, allocated_dom);
@@ -115,7 +114,7 @@ TEST_F(audit, layers)
/* Checks that the new domain is younger than the previous one. */
EXPECT_GT(allocated_dom, prev_dom);
prev_dom = allocated_dom;
- (*self->domain_stack)[i] = allocated_dom;
+ (*domain_stack)[i] = allocated_dom;
}
/* Checks that we reached the maximum number of layers. */
@@ -142,23 +141,143 @@ TEST_F(audit, layers)
/* Purges log from deallocated domains. */
EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
&audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
- for (i = ARRAY_SIZE(*self->domain_stack) - 1; i >= 0; i--) {
+ for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) {
__u64 deallocated_dom = 2;
EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
&deallocated_dom));
- EXPECT_EQ((*self->domain_stack)[i], deallocated_dom)
+ EXPECT_EQ((*domain_stack)[i], deallocated_dom)
{
TH_LOG("Failed to match domain %llx (#%d)",
- (*self->domain_stack)[i], i);
+ (*domain_stack)[i], i);
}
}
+ EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack)));
EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
&audit_tv_default, sizeof(audit_tv_default)));
-
EXPECT_EQ(0, close(ruleset_fd));
}
+struct thread_data {
+ pid_t parent_pid;
+ int ruleset_fd, pipe_child, pipe_parent;
+};
+
+static void *thread_audit_test(void *arg)
+{
+ const struct thread_data *data = (struct thread_data *)arg;
+ uintptr_t err = 0;
+ char buffer;
+
+ /* TGID and TID are different for a second thread. */
+ if (getpid() == gettid()) {
+ err = 1;
+ goto out;
+ }
+
+ if (landlock_restrict_self(data->ruleset_fd, 0)) {
+ err = 2;
+ goto out;
+ }
+
+ if (close(data->ruleset_fd)) {
+ err = 3;
+ goto out;
+ }
+
+ /* Creates a denial to get the domain ID. */
+ if (kill(data->parent_pid, 0) != -1) {
+ err = 4;
+ goto out;
+ }
+
+ if (EPERM != errno) {
+ err = 5;
+ goto out;
+ }
+
+ /* Signals the parent to read denial logs. */
+ if (write(data->pipe_child, ".", 1) != 1) {
+ err = 6;
+ goto out;
+ }
+
+ /* Waits for the parent to update audit filters. */
+ if (read(data->pipe_parent, &buffer, 1) != 1) {
+ err = 7;
+ goto out;
+ }
+
+out:
+ close(data->pipe_child);
+ close(data->pipe_parent);
+ return (void *)err;
+}
+
+/* Checks that the PID tied to a domain is not a TID but the TGID. */
+TEST_F(audit, thread)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ __u64 denial_dom = 1;
+ __u64 allocated_dom = 2;
+ __u64 deallocated_dom = 3;
+ pthread_t thread;
+ int pipe_child[2], pipe_parent[2];
+ char buffer;
+ struct thread_data child_data;
+
+ child_data.parent_pid = getppid();
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ child_data.pipe_child = pipe_child[1];
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ child_data.pipe_parent = pipe_parent[0];
+ child_data.ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, child_data.ruleset_fd);
+
+ /* TGID and TID are the same for the initial thread . */
+ EXPECT_EQ(getpid(), gettid());
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, pthread_create(&thread, NULL, thread_audit_test,
+ &child_data));
+
+ /* Waits for the child to generate a denial. */
+ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+ EXPECT_EQ(0, close(pipe_child[0]));
+
+ /* Matches the signal log to get the domain ID. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ child_data.parent_pid, &denial_dom));
+ EXPECT_NE(denial_dom, 1);
+ EXPECT_NE(denial_dom, 0);
+
+ EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, getpid(),
+ &allocated_dom));
+ EXPECT_EQ(denial_dom, allocated_dom);
+
+ /* Updates filter rules to match the drop record. */
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE));
+ EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ /* Signals the thread to exit, which will generate a domain deallocation. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(0, pthread_join(thread, NULL));
+
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
+ EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
+ &deallocated_dom));
+ EXPECT_EQ(denial_dom, deallocated_dom);
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_default, sizeof(audit_tv_default)));
+}
+
FIXTURE(audit_flags)
{
struct audit_filter audit_filter;
@@ -273,7 +392,8 @@ TEST_F(audit_flags, signal)
/* Checks domain information records. */
EXPECT_EQ(0, matches_log_domain_allocated(
- self->audit_fd, &allocated_dom));
+ self->audit_fd, getpid(),
+ &allocated_dom));
EXPECT_NE(*self->domain_id, 1);
EXPECT_NE(*self->domain_id, 0);
EXPECT_EQ(*self->domain_id, allocated_dom);
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index f819011a8798..73729382d40f 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -5964,7 +5964,8 @@ TEST_F(audit_layout1, refer_handled)
EXPECT_EQ(EXDEV, errno);
EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
dir_s1d1));
- EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, NULL));
+ EXPECT_EQ(0,
+ matches_log_domain_allocated(self->audit_fd, getpid(), NULL));
EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
dir_s1d3));
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index 81a1f64a22e8..377b3699ff31 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,2 +1,3 @@
CONFIG_TEST_BITMAP=m
+CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_BITOPS=m
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
index e949a43a6145..17ed3e9917ca 100644
--- a/tools/testing/selftests/mincore/mincore_selftest.c
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -261,9 +261,6 @@ TEST(check_file_mmap)
TH_LOG("No read-ahead pages found in memory");
}
- EXPECT_LT(i, vec_size) {
- TH_LOG("Read-ahead pages reached the end of the file");
- }
/*
* End of the readahead window. The rest of the pages shouldn't
* be in memory.
@@ -286,8 +283,7 @@ out_free:
/*
* Test mincore() behavior on a page backed by a tmpfs file. This test
- * performs the same steps as the previous one. However, we don't expect
- * any readahead in this case.
+ * performs the same steps as the previous one.
*/
TEST(check_tmpfs_mmap)
{
@@ -298,7 +294,6 @@ TEST(check_tmpfs_mmap)
int page_size;
int fd;
int i;
- int ra_pages = 0;
page_size = sysconf(_SC_PAGESIZE);
vec_size = FILE_SIZE / page_size;
@@ -341,8 +336,7 @@ TEST(check_tmpfs_mmap)
}
/*
- * Touch a page in the middle of the mapping. We expect only
- * that page to be fetched into memory.
+ * Touch a page in the middle of the mapping.
*/
addr[FILE_SIZE / 2] = 1;
retval = mincore(addr, FILE_SIZE, vec);
@@ -351,15 +345,6 @@ TEST(check_tmpfs_mmap)
TH_LOG("Page not found in memory after use");
}
- i = FILE_SIZE / 2 / page_size + 1;
- while (i < vec_size && vec[i]) {
- ra_pages++;
- i++;
- }
- ASSERT_EQ(ra_pages, 0) {
- TH_LOG("Read-ahead pages found in memory");
- }
-
munmap(addr, FILE_SIZE);
close(fd);
free(vec);
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index 121000c28c10..c5241b193db8 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -57,4 +57,4 @@ droppable
hugetlb_dio
pkey_sighandler_tests_32
pkey_sighandler_tests_64
-guard-pages
+guard-regions
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 63ce39d024bb..8270895039d1 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -97,7 +97,7 @@ TEST_GEN_FILES += hugetlb_fault_after_madv
TEST_GEN_FILES += hugetlb_madv_vs_map
TEST_GEN_FILES += hugetlb_dio
TEST_GEN_FILES += droppable
-TEST_GEN_FILES += guard-pages
+TEST_GEN_FILES += guard-regions
ifneq ($(ARCH),arm64)
TEST_GEN_FILES += soft-dirty
diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
index 67df7b47087f..e1fe16bcbbe8 100755
--- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
@@ -29,7 +29,7 @@ fi
if [[ $cgroup2 ]]; then
cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}')
if [[ -z "$cgroup_path" ]]; then
- cgroup_path=/dev/cgroup/memory
+ cgroup_path=$(mktemp -d)
mount -t cgroup2 none $cgroup_path
do_umount=1
fi
@@ -37,7 +37,7 @@ if [[ $cgroup2 ]]; then
else
cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
if [[ -z "$cgroup_path" ]]; then
- cgroup_path=/dev/cgroup/memory
+ cgroup_path=$(mktemp -d)
mount -t cgroup memory,hugetlb $cgroup_path
do_umount=1
fi
diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
index 2c3a0eb6b22d..9bc4591c7b16 100644
--- a/tools/testing/selftests/mm/compaction_test.c
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
int compaction_index = 0;
char nr_hugepages[20] = {0};
char init_nr_hugepages[24] = {0};
+ char target_nr_hugepages[24] = {0};
+ int slen;
snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
"%lu", initial_nr_hugepages);
@@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
goto out;
}
- /* Request a large number of huge pages. The Kernel will allocate
- as much as it can */
- if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
- ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
+ /*
+ * Request huge pages for about half of the free memory. The Kernel
+ * will allocate as much as it can, and we expect it will get at least 1/3
+ */
+ nr_hugepages_ul = mem_free / hugepage_size / 2;
+ snprintf(target_nr_hugepages, sizeof(target_nr_hugepages),
+ "%lu", nr_hugepages_ul);
+
+ slen = strlen(target_nr_hugepages);
+ if (write(fd, target_nr_hugepages, slen) != slen) {
+ ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n",
+ nr_hugepages_ul, strerror(errno));
goto close_fd;
}
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index 9446673645eb..b6cfe0a4b7df 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -293,7 +293,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
.iov_base = mem,
.iov_len = size,
};
- ssize_t cur, total, transferred;
+ ssize_t cur, total, transferred = 0;
struct comm_pipes comm_pipes;
char *old, *new;
int ret, fds[2];
@@ -876,7 +876,7 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
mremap_size = thpsize / 2;
mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (mem == MAP_FAILED) {
+ if (mremap_mem == MAP_FAILED) {
ksft_test_result_fail("mmap() failed\n");
goto munmap;
}
diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-regions.c
index 525c50d3ec23..eba43ead13ae 100644
--- a/tools/testing/selftests/mm/guard-pages.c
+++ b/tools/testing/selftests/mm/guard-regions.c
@@ -6,6 +6,7 @@
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
+#include <linux/limits.h>
#include <linux/userfaultfd.h>
#include <setjmp.h>
#include <signal.h>
@@ -18,6 +19,7 @@
#include <sys/syscall.h>
#include <sys/uio.h>
#include <unistd.h>
+#include "vm_util.h"
#include "../pidfd/pidfd.h"
@@ -39,6 +41,79 @@ static sigjmp_buf signal_jmp_buf;
*/
#define FORCE_READ(x) (*(volatile typeof(x) *)x)
+/*
+ * How is the test backing the mapping being tested?
+ */
+enum backing_type {
+ ANON_BACKED,
+ SHMEM_BACKED,
+ LOCAL_FILE_BACKED,
+};
+
+FIXTURE(guard_regions)
+{
+ unsigned long page_size;
+ char path[PATH_MAX];
+ int fd;
+};
+
+FIXTURE_VARIANT(guard_regions)
+{
+ enum backing_type backing;
+};
+
+FIXTURE_VARIANT_ADD(guard_regions, anon)
+{
+ .backing = ANON_BACKED,
+};
+
+FIXTURE_VARIANT_ADD(guard_regions, shmem)
+{
+ .backing = SHMEM_BACKED,
+};
+
+FIXTURE_VARIANT_ADD(guard_regions, file)
+{
+ .backing = LOCAL_FILE_BACKED,
+};
+
+static bool is_anon_backed(const FIXTURE_VARIANT(guard_regions) * variant)
+{
+ switch (variant->backing) {
+ case ANON_BACKED:
+ case SHMEM_BACKED:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void *mmap_(FIXTURE_DATA(guard_regions) * self,
+ const FIXTURE_VARIANT(guard_regions) * variant,
+ void *addr, size_t length, int prot, int extra_flags,
+ off_t offset)
+{
+ int fd;
+ int flags = extra_flags;
+
+ switch (variant->backing) {
+ case ANON_BACKED:
+ flags |= MAP_PRIVATE | MAP_ANON;
+ fd = -1;
+ break;
+ case SHMEM_BACKED:
+ case LOCAL_FILE_BACKED:
+ flags |= MAP_SHARED;
+ fd = self->fd;
+ break;
+ default:
+ ksft_exit_fail();
+ break;
+ }
+
+ return mmap(addr, length, prot, flags, fd, offset);
+}
+
static int userfaultfd(int flags)
{
return syscall(SYS_userfaultfd, flags);
@@ -104,12 +179,7 @@ static bool try_read_write_buf(char *ptr)
return try_read_buf(ptr) && try_write_buf(ptr);
}
-FIXTURE(guard_pages)
-{
- unsigned long page_size;
-};
-
-FIXTURE_SETUP(guard_pages)
+static void setup_sighandler(void)
{
struct sigaction act = {
.sa_handler = &handle_fatal,
@@ -119,11 +189,9 @@ FIXTURE_SETUP(guard_pages)
sigemptyset(&act.sa_mask);
if (sigaction(SIGSEGV, &act, NULL))
ksft_exit_fail_perror("sigaction");
+}
- self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
-};
-
-FIXTURE_TEARDOWN(guard_pages)
+static void teardown_sighandler(void)
{
struct sigaction act = {
.sa_handler = SIG_DFL,
@@ -134,15 +202,113 @@ FIXTURE_TEARDOWN(guard_pages)
sigaction(SIGSEGV, &act, NULL);
}
-TEST_F(guard_pages, basic)
+static int open_file(const char *prefix, char *path)
+{
+ int fd;
+
+ snprintf(path, PATH_MAX, "%sguard_regions_test_file_XXXXXX", prefix);
+ fd = mkstemp(path);
+ if (fd < 0)
+ ksft_exit_fail_perror("mkstemp");
+
+ return fd;
+}
+
+/* Establish a varying pattern in a buffer. */
+static void set_pattern(char *ptr, size_t num_pages, size_t page_size)
+{
+ size_t i;
+
+ for (i = 0; i < num_pages; i++) {
+ char *ptr2 = &ptr[i * page_size];
+
+ memset(ptr2, 'a' + (i % 26), page_size);
+ }
+}
+
+/*
+ * Check that a buffer contains the pattern set by set_pattern(), starting at a
+ * page offset of pgoff within the buffer.
+ */
+static bool check_pattern_offset(char *ptr, size_t num_pages, size_t page_size,
+ size_t pgoff)
+{
+ size_t i;
+
+ for (i = 0; i < num_pages * page_size; i++) {
+ size_t offset = pgoff * page_size + i;
+ char actual = ptr[offset];
+ char expected = 'a' + ((offset / page_size) % 26);
+
+ if (actual != expected)
+ return false;
+ }
+
+ return true;
+}
+
+/* Check that a buffer contains the pattern set by set_pattern(). */
+static bool check_pattern(char *ptr, size_t num_pages, size_t page_size)
+{
+ return check_pattern_offset(ptr, num_pages, page_size, 0);
+}
+
+/* Determine if a buffer contains only repetitions of a specified char. */
+static bool is_buf_eq(char *buf, size_t size, char chr)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (buf[i] != chr)
+ return false;
+ }
+
+ return true;
+}
+
+FIXTURE_SETUP(guard_regions)
+{
+ self->page_size = (unsigned long)sysconf(_SC_PAGESIZE);
+ setup_sighandler();
+
+ switch (variant->backing) {
+ case ANON_BACKED:
+ return;
+ case LOCAL_FILE_BACKED:
+ self->fd = open_file("", self->path);
+ break;
+ case SHMEM_BACKED:
+ self->fd = memfd_create(self->path, 0);
+ break;
+ }
+
+ /* We truncate file to at least 100 pages, tests can modify as needed. */
+ ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0);
+};
+
+FIXTURE_TEARDOWN_PARENT(guard_regions)
+{
+ teardown_sighandler();
+
+ if (variant->backing == ANON_BACKED)
+ return;
+
+ if (self->fd >= 0)
+ close(self->fd);
+
+ if (self->path[0] != '\0')
+ unlink(self->path);
+}
+
+TEST_F(guard_regions, basic)
{
const unsigned long NUM_PAGES = 10;
const unsigned long page_size = self->page_size;
char *ptr;
int i;
- ptr = mmap(NULL, NUM_PAGES * page_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANON, -1, 0);
+ ptr = mmap_(self, variant, NULL, NUM_PAGES * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Trivially assert we can touch the first page. */
@@ -228,32 +394,30 @@ TEST_F(guard_pages, basic)
}
/* Assert that operations applied across multiple VMAs work as expected. */
-TEST_F(guard_pages, multi_vma)
+TEST_F(guard_regions, multi_vma)
{
const unsigned long page_size = self->page_size;
char *ptr_region, *ptr, *ptr1, *ptr2, *ptr3;
int i;
/* Reserve a 100 page region over which we can install VMAs. */
- ptr_region = mmap(NULL, 100 * page_size, PROT_NONE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr_region = mmap_(self, variant, NULL, 100 * page_size,
+ PROT_NONE, 0, 0);
ASSERT_NE(ptr_region, MAP_FAILED);
/* Place a VMA of 10 pages size at the start of the region. */
- ptr1 = mmap(ptr_region, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr1 = mmap_(self, variant, ptr_region, 10 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
ASSERT_NE(ptr1, MAP_FAILED);
/* Place a VMA of 5 pages size 50 pages into the region. */
- ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size,
- PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
ASSERT_NE(ptr2, MAP_FAILED);
/* Place a VMA of 20 pages size at the end of the region. */
- ptr3 = mmap(&ptr_region[80 * page_size], 20 * page_size,
- PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr3 = mmap_(self, variant, &ptr_region[80 * page_size], 20 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
ASSERT_NE(ptr3, MAP_FAILED);
/* Unmap gaps. */
@@ -323,13 +487,11 @@ TEST_F(guard_pages, multi_vma)
}
/* Now map incompatible VMAs in the gaps. */
- ptr = mmap(&ptr_region[10 * page_size], 40 * page_size,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, &ptr_region[10 * page_size], 40 * page_size,
+ PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0);
ASSERT_NE(ptr, MAP_FAILED);
- ptr = mmap(&ptr_region[55 * page_size], 25 * page_size,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, &ptr_region[55 * page_size], 25 * page_size,
+ PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0);
ASSERT_NE(ptr, MAP_FAILED);
/*
@@ -364,7 +526,7 @@ TEST_F(guard_pages, multi_vma)
* Assert that batched operations performed using process_madvise() work as
* expected.
*/
-TEST_F(guard_pages, process_madvise)
+TEST_F(guard_regions, process_madvise)
{
const unsigned long page_size = self->page_size;
char *ptr_region, *ptr1, *ptr2, *ptr3;
@@ -372,8 +534,8 @@ TEST_F(guard_pages, process_madvise)
struct iovec vec[6];
/* Reserve region to map over. */
- ptr_region = mmap(NULL, 100 * page_size, PROT_NONE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr_region = mmap_(self, variant, NULL, 100 * page_size,
+ PROT_NONE, 0, 0);
ASSERT_NE(ptr_region, MAP_FAILED);
/*
@@ -381,9 +543,8 @@ TEST_F(guard_pages, process_madvise)
* overwrite existing entries and test this code path against
* overwriting existing entries.
*/
- ptr1 = mmap(&ptr_region[page_size], 10 * page_size,
- PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE | MAP_POPULATE, -1, 0);
+ ptr1 = mmap_(self, variant, &ptr_region[page_size], 10 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED | MAP_POPULATE, 0);
ASSERT_NE(ptr1, MAP_FAILED);
/* We want guard markers at start/end of each VMA. */
vec[0].iov_base = ptr1;
@@ -392,9 +553,8 @@ TEST_F(guard_pages, process_madvise)
vec[1].iov_len = page_size;
/* 5 pages offset 50 pages into reserve region. */
- ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size,
- PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
ASSERT_NE(ptr2, MAP_FAILED);
vec[2].iov_base = ptr2;
vec[2].iov_len = page_size;
@@ -402,9 +562,8 @@ TEST_F(guard_pages, process_madvise)
vec[3].iov_len = page_size;
/* 20 pages offset 79 pages into reserve region. */
- ptr3 = mmap(&ptr_region[79 * page_size], 20 * page_size,
- PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr3 = mmap_(self, variant, &ptr_region[79 * page_size], 20 * page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
ASSERT_NE(ptr3, MAP_FAILED);
vec[4].iov_base = ptr3;
vec[4].iov_len = page_size;
@@ -459,13 +618,13 @@ TEST_F(guard_pages, process_madvise)
}
/* Assert that unmapping ranges does not leave guard markers behind. */
-TEST_F(guard_pages, munmap)
+TEST_F(guard_regions, munmap)
{
const unsigned long page_size = self->page_size;
char *ptr, *ptr_new1, *ptr_new2;
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Guard first and last pages. */
@@ -481,11 +640,11 @@ TEST_F(guard_pages, munmap)
ASSERT_EQ(munmap(&ptr[9 * page_size], page_size), 0);
/* Map over them.*/
- ptr_new1 = mmap(ptr, page_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr_new1 = mmap_(self, variant, ptr, page_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED, 0);
ASSERT_NE(ptr_new1, MAP_FAILED);
- ptr_new2 = mmap(&ptr[9 * page_size], page_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr_new2 = mmap_(self, variant, &ptr[9 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
ASSERT_NE(ptr_new2, MAP_FAILED);
/* Assert that they are now not guarded. */
@@ -497,14 +656,14 @@ TEST_F(guard_pages, munmap)
}
/* Assert that mprotect() operations have no bearing on guard markers. */
-TEST_F(guard_pages, mprotect)
+TEST_F(guard_regions, mprotect)
{
const unsigned long page_size = self->page_size;
char *ptr;
int i;
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Guard the middle of the range. */
@@ -545,14 +704,14 @@ TEST_F(guard_pages, mprotect)
}
/* Split and merge VMAs and make sure guard pages still behave. */
-TEST_F(guard_pages, split_merge)
+TEST_F(guard_regions, split_merge)
{
const unsigned long page_size = self->page_size;
char *ptr, *ptr_new;
int i;
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Guard the whole range. */
@@ -593,14 +752,14 @@ TEST_F(guard_pages, split_merge)
}
/* Now map them again - the unmap will have cleared the guards. */
- ptr_new = mmap(&ptr[2 * page_size], page_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr_new = mmap_(self, variant, &ptr[2 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
ASSERT_NE(ptr_new, MAP_FAILED);
- ptr_new = mmap(&ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr_new = mmap_(self, variant, &ptr[5 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
ASSERT_NE(ptr_new, MAP_FAILED);
- ptr_new = mmap(&ptr[8 * page_size], page_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr_new = mmap_(self, variant, &ptr[8 * page_size], page_size,
+ PROT_READ | PROT_WRITE, MAP_FIXED, 0);
ASSERT_NE(ptr_new, MAP_FAILED);
/* Now make sure guard pages are established. */
@@ -676,14 +835,14 @@ TEST_F(guard_pages, split_merge)
}
/* Assert that MADV_DONTNEED does not remove guard markers. */
-TEST_F(guard_pages, dontneed)
+TEST_F(guard_regions, dontneed)
{
const unsigned long page_size = self->page_size;
char *ptr;
int i;
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Back the whole range. */
@@ -713,8 +872,16 @@ TEST_F(guard_pages, dontneed)
ASSERT_FALSE(result);
} else {
ASSERT_TRUE(result);
- /* Make sure we really did get reset to zero page. */
- ASSERT_EQ(*curr, '\0');
+ switch (variant->backing) {
+ case ANON_BACKED:
+ /* If anon, then we get a zero page. */
+ ASSERT_EQ(*curr, '\0');
+ break;
+ default:
+ /* Otherwise, we get the file data. */
+ ASSERT_EQ(*curr, 'y');
+ break;
+ }
}
/* Now write... */
@@ -729,14 +896,14 @@ TEST_F(guard_pages, dontneed)
}
/* Assert that mlock()'ed pages work correctly with guard markers. */
-TEST_F(guard_pages, mlock)
+TEST_F(guard_regions, mlock)
{
const unsigned long page_size = self->page_size;
char *ptr;
int i;
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Populate. */
@@ -802,14 +969,14 @@ TEST_F(guard_pages, mlock)
*
* - Moving a mapping alone should retain markers as they are.
*/
-TEST_F(guard_pages, mremap_move)
+TEST_F(guard_regions, mremap_move)
{
const unsigned long page_size = self->page_size;
char *ptr, *ptr_new;
/* Map 5 pages. */
- ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 5 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Place guard markers at both ends of the 5 page span. */
@@ -823,8 +990,7 @@ TEST_F(guard_pages, mremap_move)
/* Map a new region we will move this range into. Doing this ensures
* that we have reserved a range to map into.
*/
- ptr_new = mmap(NULL, 5 * page_size, PROT_NONE, MAP_ANON | MAP_PRIVATE,
- -1, 0);
+ ptr_new = mmap_(self, variant, NULL, 5 * page_size, PROT_NONE, 0, 0);
ASSERT_NE(ptr_new, MAP_FAILED);
ASSERT_EQ(mremap(ptr, 5 * page_size, 5 * page_size,
@@ -849,14 +1015,14 @@ TEST_F(guard_pages, mremap_move)
* will have to remove guard pages manually to fix up (they'd have to do the
* same if it were a PROT_NONE mapping).
*/
-TEST_F(guard_pages, mremap_expand)
+TEST_F(guard_regions, mremap_expand)
{
const unsigned long page_size = self->page_size;
char *ptr, *ptr_new;
/* Map 10 pages... */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* ...But unmap the last 5 so we can ensure we can expand into them. */
ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0);
@@ -880,8 +1046,7 @@ TEST_F(guard_pages, mremap_expand)
ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size]));
/* Reserve a region which we can move to and expand into. */
- ptr_new = mmap(NULL, 20 * page_size, PROT_NONE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr_new = mmap_(self, variant, NULL, 20 * page_size, PROT_NONE, 0, 0);
ASSERT_NE(ptr_new, MAP_FAILED);
/* Now move and expand into it. */
@@ -912,15 +1077,15 @@ TEST_F(guard_pages, mremap_expand)
* if the user were using a PROT_NONE mapping they'd have to manually fix this
* up also so this is OK.
*/
-TEST_F(guard_pages, mremap_shrink)
+TEST_F(guard_regions, mremap_shrink)
{
const unsigned long page_size = self->page_size;
char *ptr;
int i;
/* Map 5 pages. */
- ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 5 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Place guard markers at both ends of the 5 page span. */
@@ -976,7 +1141,7 @@ TEST_F(guard_pages, mremap_shrink)
* Assert that forking a process with VMAs that do not have VM_WIPEONFORK set
* retain guard pages.
*/
-TEST_F(guard_pages, fork)
+TEST_F(guard_regions, fork)
{
const unsigned long page_size = self->page_size;
char *ptr;
@@ -984,8 +1149,8 @@ TEST_F(guard_pages, fork)
int i;
/* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Establish guard pages in the first 5 pages. */
@@ -1031,16 +1196,19 @@ TEST_F(guard_pages, fork)
* Assert expected behaviour after we fork populated ranges of anonymous memory
* and then guard and unguard the range.
*/
-TEST_F(guard_pages, fork_cow)
+TEST_F(guard_regions, fork_cow)
{
const unsigned long page_size = self->page_size;
char *ptr;
pid_t pid;
int i;
+ if (variant->backing != ANON_BACKED)
+ SKIP(return, "CoW only supported on anon mappings");
+
/* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Populate range. */
@@ -1102,16 +1270,19 @@ TEST_F(guard_pages, fork_cow)
* Assert that forking a process with VMAs that do have VM_WIPEONFORK set
* behave as expected.
*/
-TEST_F(guard_pages, fork_wipeonfork)
+TEST_F(guard_regions, fork_wipeonfork)
{
const unsigned long page_size = self->page_size;
char *ptr;
pid_t pid;
int i;
+ if (variant->backing != ANON_BACKED)
+ SKIP(return, "Wipe on fork only supported on anon mappings");
+
/* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Mark wipe on fork. */
@@ -1152,15 +1323,18 @@ TEST_F(guard_pages, fork_wipeonfork)
}
/* Ensure that MADV_FREE retains guard entries as expected. */
-TEST_F(guard_pages, lazyfree)
+TEST_F(guard_regions, lazyfree)
{
const unsigned long page_size = self->page_size;
char *ptr;
int i;
+ if (variant->backing != ANON_BACKED)
+ SKIP(return, "MADV_FREE only supported on anon mappings");
+
/* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Guard range. */
@@ -1188,14 +1362,14 @@ TEST_F(guard_pages, lazyfree)
}
/* Ensure that MADV_POPULATE_READ, MADV_POPULATE_WRITE behave as expected. */
-TEST_F(guard_pages, populate)
+TEST_F(guard_regions, populate)
{
const unsigned long page_size = self->page_size;
char *ptr;
/* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Guard range. */
@@ -1214,15 +1388,15 @@ TEST_F(guard_pages, populate)
}
/* Ensure that MADV_COLD, MADV_PAGEOUT do not remove guard markers. */
-TEST_F(guard_pages, cold_pageout)
+TEST_F(guard_regions, cold_pageout)
{
const unsigned long page_size = self->page_size;
char *ptr;
int i;
/* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Guard range. */
@@ -1260,7 +1434,7 @@ TEST_F(guard_pages, cold_pageout)
}
/* Ensure that guard pages do not break userfaultd. */
-TEST_F(guard_pages, uffd)
+TEST_F(guard_regions, uffd)
{
const unsigned long page_size = self->page_size;
int uffd;
@@ -1273,6 +1447,9 @@ TEST_F(guard_pages, uffd)
struct uffdio_register reg;
struct uffdio_range range;
+ if (!is_anon_backed(variant))
+ SKIP(return, "uffd only works on anon backing");
+
/* Set up uffd. */
uffd = userfaultfd(0);
if (uffd == -1 && errno == EPERM)
@@ -1282,8 +1459,8 @@ TEST_F(guard_pages, uffd)
ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0);
/* Map 10 pages. */
- ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
- MAP_ANON | MAP_PRIVATE, -1, 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
ASSERT_NE(ptr, MAP_FAILED);
/* Register the range with uffd. */
@@ -1309,4 +1486,593 @@ TEST_F(guard_pages, uffd)
ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
}
+/*
+ * Mark a region within a file-backed mapping using MADV_SEQUENTIAL so we
+ * aggressively read-ahead, then install guard regions and assert that it
+ * behaves correctly.
+ *
+ * We page out using MADV_PAGEOUT before checking guard regions so we drop page
+ * cache folios, meaning we maximise the possibility of some broken readahead.
+ */
+TEST_F(guard_regions, madvise_sequential)
+{
+ char *ptr;
+ int i;
+ const unsigned long page_size = self->page_size;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "MADV_SEQUENTIAL meaningful only for file-backed");
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Establish a pattern of data in the file. */
+ set_pattern(ptr, 10, page_size);
+ ASSERT_TRUE(check_pattern(ptr, 10, page_size));
+
+ /* Mark it as being accessed sequentially. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_SEQUENTIAL), 0);
+
+ /* Mark every other page a guard page. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr2 = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr2, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now page it out. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0);
+
+ /* Now make sure pages are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *chrp = &ptr[i * page_size];
+
+ if (i % 2 == 0) {
+ bool result = try_read_write_buf(chrp);
+
+ ASSERT_FALSE(result);
+ } else {
+ ASSERT_EQ(*chrp, 'a' + i);
+ }
+ }
+
+ /* Now remove guard pages. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Now make sure all data is as expected. */
+ if (!check_pattern(ptr, 10, page_size))
+ ASSERT_TRUE(false);
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Check that file-backed mappings implement guard regions with MAP_PRIVATE
+ * correctly.
+ */
+TEST_F(guard_regions, map_private)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr_shared, *ptr_private;
+ int i;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "MAP_PRIVATE test specific to file-backed");
+
+ ptr_shared = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr_shared, MAP_FAILED);
+
+ /* Manually mmap(), do not use mmap_() wrapper so we can force MAP_PRIVATE. */
+ ptr_private = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, self->fd, 0);
+ ASSERT_NE(ptr_private, MAP_FAILED);
+
+ /* Set pattern in shared mapping. */
+ set_pattern(ptr_shared, 10, page_size);
+
+ /* Install guard regions in every other page in the shared mapping. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr = &ptr_shared[i * page_size];
+
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ for (i = 0; i < 10; i++) {
+ /* Every even shared page should be guarded. */
+ ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0);
+ /* Private mappings should always be readable. */
+ ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size]));
+ }
+
+ /* Install guard regions in every other page in the private mapping. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr = &ptr_private[i * page_size];
+
+ ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ for (i = 0; i < 10; i++) {
+ /* Every even shared page should be guarded. */
+ ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0);
+ /* Every odd private page should be guarded. */
+ ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0);
+ }
+
+ /* Remove guard regions from shared mapping. */
+ ASSERT_EQ(madvise(ptr_shared, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ for (i = 0; i < 10; i++) {
+ /* Shared mappings should always be readable. */
+ ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size]));
+ /* Every even private page should be guarded. */
+ ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0);
+ }
+
+ /* Remove guard regions from private mapping. */
+ ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ for (i = 0; i < 10; i++) {
+ /* Shared mappings should always be readable. */
+ ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size]));
+ /* Private mappings should always be readable. */
+ ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size]));
+ }
+
+ /* Ensure patterns are intact. */
+ ASSERT_TRUE(check_pattern(ptr_shared, 10, page_size));
+ ASSERT_TRUE(check_pattern(ptr_private, 10, page_size));
+
+ /* Now write out every other page to MAP_PRIVATE. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr = &ptr_private[i * page_size];
+
+ memset(ptr, 'a' + i, page_size);
+ }
+
+ /*
+ * At this point the mapping is:
+ *
+ * 0123456789
+ * SPSPSPSPSP
+ *
+ * Where S = shared, P = private mappings.
+ */
+
+ /* Now mark the beginning of the mapping guarded. */
+ ASSERT_EQ(madvise(ptr_private, 5 * page_size, MADV_GUARD_INSTALL), 0);
+
+ /*
+ * This renders the mapping:
+ *
+ * 0123456789
+ * xxxxxPSPSP
+ */
+
+ for (i = 0; i < 10; i++) {
+ char *ptr = &ptr_private[i * page_size];
+
+ /* Ensure guard regions as expected. */
+ ASSERT_EQ(try_read_buf(ptr), i >= 5);
+ /* The shared mapping should always succeed. */
+ ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size]));
+ }
+
+ /* Remove the guard regions altogether. */
+ ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /*
+ *
+ * We now expect the mapping to be:
+ *
+ * 0123456789
+ * SSSSSPSPSP
+ *
+ * As we removed guard regions, the private pages from the first 5 will
+ * have been zapped, so on fault will reestablish the shared mapping.
+ */
+
+ for (i = 0; i < 10; i++) {
+ char *ptr = &ptr_private[i * page_size];
+
+ /*
+ * Assert that shared mappings in the MAP_PRIVATE mapping match
+ * the shared mapping.
+ */
+ if (i < 5 || i % 2 == 0) {
+ char *ptr_s = &ptr_shared[i * page_size];
+
+ ASSERT_EQ(memcmp(ptr, ptr_s, page_size), 0);
+ continue;
+ }
+
+ /* Everything else is a private mapping. */
+ ASSERT_TRUE(is_buf_eq(ptr, page_size, 'a' + i));
+ }
+
+ ASSERT_EQ(munmap(ptr_shared, 10 * page_size), 0);
+ ASSERT_EQ(munmap(ptr_private, 10 * page_size), 0);
+}
+
+/* Test that guard regions established over a read-only mapping function correctly. */
+TEST_F(guard_regions, readonly_file)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing != LOCAL_FILE_BACKED)
+ SKIP(return, "Read-only test specific to file-backed");
+
+ /* Map shared so we can populate with pattern, populate it, unmap. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ set_pattern(ptr, 10, page_size);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+ /* Close the fd so we can re-open read-only. */
+ ASSERT_EQ(close(self->fd), 0);
+
+ /* Re-open read-only. */
+ self->fd = open(self->path, O_RDONLY);
+ ASSERT_NE(self->fd, -1);
+ /* Re-map read-only. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Mark every other page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_pg = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_pg, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Assert that the guard regions are in place.*/
+ for (i = 0; i < 10; i++) {
+ char *ptr_pg = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_pg), i % 2 != 0);
+ }
+
+ /* Remove guard regions. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Ensure the data is as expected. */
+ ASSERT_TRUE(check_pattern(ptr, 10, page_size));
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+TEST_F(guard_regions, fault_around)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "Fault-around test specific to file-backed");
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Establish a pattern in the backing file. */
+ set_pattern(ptr, 10, page_size);
+
+ /*
+ * Now drop it from the page cache so we get major faults when next we
+ * map it.
+ */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0);
+
+ /* Unmap and remap 'to be sure'. */
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Now make every even page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now fault in every odd page. This should trigger fault-around. */
+ for (i = 1; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_buf(ptr_p));
+ }
+
+ /* Finally, ensure that guard regions are intact as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0);
+ }
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+TEST_F(guard_regions, truncation)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "Truncation test specific to file-backed");
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /*
+ * Establish a pattern in the backing file, just so there is data
+ * there.
+ */
+ set_pattern(ptr, 10, page_size);
+
+ /* Now make every even page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now assert things are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0);
+ }
+
+ /* Now truncate to actually used size (initialised to 100). */
+ ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0);
+
+ /* Here the guard regions will remain intact. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0);
+ }
+
+ /* Now truncate to half the size, then truncate again to the full size. */
+ ASSERT_EQ(ftruncate(self->fd, 5 * page_size), 0);
+ ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0);
+
+ /* Again, guard pages will remain intact. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0);
+ }
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+TEST_F(guard_regions, hole_punch)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing == ANON_BACKED)
+ SKIP(return, "Truncation test specific to file-backed");
+
+ /* Establish pattern in mapping. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ set_pattern(ptr, 10, page_size);
+
+ /* Install a guard region in the middle of the mapping. */
+ ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size,
+ MADV_GUARD_INSTALL), 0);
+
+ /*
+ * The buffer will now be:
+ *
+ * 0123456789
+ * ***xxxx***
+ *
+ * Where * is data and x is the guard region.
+ */
+
+ /* Ensure established. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7);
+ }
+
+ /* Now hole punch the guarded region. */
+ ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size,
+ MADV_REMOVE), 0);
+
+ /* Ensure guard regions remain. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7);
+ }
+
+ /* Now remove guard region throughout. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Check that the pattern exists in non-hole punched region. */
+ ASSERT_TRUE(check_pattern(ptr, 3, page_size));
+ /* Check that hole punched region is zeroed. */
+ ASSERT_TRUE(is_buf_eq(&ptr[3 * page_size], 4 * page_size, '\0'));
+ /* Check that the pattern exists in the remainder of the file. */
+ ASSERT_TRUE(check_pattern_offset(ptr, 3, page_size, 7));
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Ensure that a memfd works correctly with guard regions, that we can write
+ * seal it then open the mapping read-only and still establish guard regions
+ * within, remove those guard regions and have everything work correctly.
+ */
+TEST_F(guard_regions, memfd_write_seal)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (variant->backing != SHMEM_BACKED)
+ SKIP(return, "memfd write seal test specific to shmem");
+
+ /* OK, we need a memfd, so close existing one. */
+ ASSERT_EQ(close(self->fd), 0);
+
+ /* Create and truncate memfd. */
+ self->fd = memfd_create("guard_regions_memfd_seals_test",
+ MFD_ALLOW_SEALING);
+ ASSERT_NE(self->fd, -1);
+ ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0);
+
+ /* Map, set pattern, unmap. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+ set_pattern(ptr, 10, page_size);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+
+ /* Write-seal the memfd. */
+ ASSERT_EQ(fcntl(self->fd, F_ADD_SEALS, F_SEAL_WRITE), 0);
+
+ /* Now map the memfd readonly. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Ensure pattern is as expected. */
+ ASSERT_TRUE(check_pattern(ptr, 10, page_size));
+
+ /* Now make every even page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now assert things are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0);
+ }
+
+ /* Now remove guard regions. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Ensure pattern is as expected. */
+ ASSERT_TRUE(check_pattern(ptr, 10, page_size));
+
+ /* Ensure write seal intact. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_FALSE(try_write_buf(ptr_p));
+ }
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+
+/*
+ * Since we are now permitted to establish guard regions in read-only anonymous
+ * mappings, for the sake of thoroughness, though it probably has no practical
+ * use, test that guard regions function with a mapping to the anonymous zero
+ * page.
+ */
+TEST_F(guard_regions, anon_zeropage)
+{
+ const unsigned long page_size = self->page_size;
+ char *ptr;
+ int i;
+
+ if (!is_anon_backed(variant))
+ SKIP(return, "anon zero page test specific to anon/shmem");
+
+ /* Obtain a read-only i.e. anon zero page mapping. */
+ ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Now make every even page guarded. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Now assert things are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0);
+ }
+
+ /* Now remove all guard regions. */
+ ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0);
+
+ /* Now assert things are as expected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_TRUE(try_read_buf(ptr_p));
+ }
+
+ /* Ensure zero page...*/
+ ASSERT_TRUE(is_buf_eq(ptr, 10 * page_size, '\0'));
+
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
+/*
+ * Assert that /proc/$pid/pagemap correctly identifies guard region ranges.
+ */
+TEST_F(guard_regions, pagemap)
+{
+ const unsigned long page_size = self->page_size;
+ int proc_fd;
+ char *ptr;
+ int i;
+
+ proc_fd = open("/proc/self/pagemap", O_RDONLY);
+ ASSERT_NE(proc_fd, -1);
+
+ ptr = mmap_(self, variant, NULL, 10 * page_size,
+ PROT_READ | PROT_WRITE, 0, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ /* Read from pagemap, and assert no guard regions are detected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+ unsigned long entry = pagemap_get_entry(proc_fd, ptr_p);
+ unsigned long masked = entry & PM_GUARD_REGION;
+
+ ASSERT_EQ(masked, 0);
+ }
+
+ /* Install a guard region in every other page. */
+ for (i = 0; i < 10; i += 2) {
+ char *ptr_p = &ptr[i * page_size];
+
+ ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0);
+ }
+
+ /* Re-read from pagemap, and assert guard regions are detected. */
+ for (i = 0; i < 10; i++) {
+ char *ptr_p = &ptr[i * page_size];
+ unsigned long entry = pagemap_get_entry(proc_fd, ptr_p);
+ unsigned long masked = entry & PM_GUARD_REGION;
+
+ ASSERT_EQ(masked, i % 2 == 0 ? PM_GUARD_REGION : 0);
+ }
+
+ ASSERT_EQ(close(proc_fd), 0);
+ ASSERT_EQ(munmap(ptr, 10 * page_size), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index 9423ad439a61..21595b20bbc3 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -96,13 +96,17 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
int ret;
if (ftruncate(fd, size)) {
- ksft_test_result_fail("ftruncate() failed\n");
+ if (errno == ENOENT) {
+ skip_test_dodgy_fs("ftruncate()");
+ } else {
+ ksft_test_result_fail("ftruncate() failed (%s)\n", strerror(errno));
+ }
return;
}
if (fallocate(fd, 0, 0, size)) {
if (size == pagesize)
- ksft_test_result_fail("fallocate() failed\n");
+ ksft_test_result_fail("fallocate() failed (%s)\n", strerror(errno));
else
ksft_test_result_skip("need more free huge pages\n");
return;
@@ -112,7 +116,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
if (mem == MAP_FAILED) {
if (size == pagesize || shared)
- ksft_test_result_fail("mmap() failed\n");
+ ksft_test_result_fail("mmap() failed (%s)\n", strerror(errno));
else
ksft_test_result_skip("need more free huge pages\n");
return;
@@ -130,7 +134,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
*/
ret = mprotect(mem, size, PROT_READ);
if (ret) {
- ksft_test_result_fail("mprotect() failed\n");
+ ksft_test_result_fail("mprotect() failed (%s)\n", strerror(errno));
goto munmap;
}
/* FALLTHROUGH */
@@ -165,18 +169,20 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
if (ret && errno == EINVAL) {
- ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
+ ksft_test_result_skip("PIN_LONGTERM_TEST_START failed (EINVAL)n");
break;
} else if (ret && errno == EFAULT) {
ksft_test_result(!should_work, "Should have failed\n");
break;
} else if (ret) {
- ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
+ ksft_test_result_fail("PIN_LONGTERM_TEST_START failed (%s)\n",
+ strerror(errno));
break;
}
if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP))
- ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
+ ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed (%s)\n",
+ strerror(errno));
/*
* TODO: if the kernel ever supports long-term R/W pinning on
@@ -202,7 +208,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/* Skip on errors, as we might just lack kernel support. */
ret = io_uring_queue_init(1, &ring, 0);
if (ret < 0) {
- ksft_test_result_skip("io_uring_queue_init() failed\n");
+ ksft_test_result_skip("io_uring_queue_init() failed (%s)\n",
+ strerror(-ret));
break;
}
/*
@@ -215,13 +222,15 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
/* Only new kernels return EFAULT. */
if (ret && (errno == ENOSPC || errno == EOPNOTSUPP ||
errno == EFAULT)) {
- ksft_test_result(!should_work, "Should have failed\n");
+ ksft_test_result(!should_work, "Should have failed (%s)\n",
+ strerror(errno));
} else if (ret) {
/*
* We might just lack support or have insufficient
* MEMLOCK limits.
*/
- ksft_test_result_skip("io_uring_register_buffers() failed\n");
+ ksft_test_result_skip("io_uring_register_buffers() failed (%s)\n",
+ strerror(-ret));
} else {
ksft_test_result(should_work, "Should have worked\n");
io_uring_unregister_buffers(&ring);
@@ -249,7 +258,7 @@ static void run_with_memfd(test_fn fn, const char *desc)
fd = memfd_create("test", 0);
if (fd < 0) {
- ksft_test_result_fail("memfd_create() failed\n");
+ ksft_test_result_fail("memfd_create() failed (%s)\n", strerror(errno));
return;
}
@@ -266,13 +275,13 @@ static void run_with_tmpfile(test_fn fn, const char *desc)
file = tmpfile();
if (!file) {
- ksft_test_result_fail("tmpfile() failed\n");
+ ksft_test_result_fail("tmpfile() failed (%s)\n", strerror(errno));
return;
}
fd = fileno(file);
if (fd < 0) {
- ksft_test_result_fail("fileno() failed\n");
+ ksft_test_result_fail("fileno() failed (%s)\n", strerror(errno));
goto close;
}
@@ -290,12 +299,12 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc)
fd = mkstemp(filename);
if (fd < 0) {
- ksft_test_result_fail("mkstemp() failed\n");
+ ksft_test_result_fail("mkstemp() failed (%s)\n", strerror(errno));
return;
}
if (unlink(filename)) {
- ksft_test_result_fail("unlink() failed\n");
+ ksft_test_result_fail("unlink() failed (%s)\n", strerror(errno));
goto close;
}
@@ -317,7 +326,7 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
fd = memfd_create("test", flags);
if (fd < 0) {
- ksft_test_result_skip("memfd_create() failed\n");
+ ksft_test_result_skip("memfd_create() failed (%s)\n", strerror(errno));
return;
}
diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index 11f9bbe7dc22..0b0d4ba1af27 100755
--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
@@ -23,7 +23,7 @@ fi
if [[ $cgroup2 ]]; then
CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}')
if [[ -z "$CGROUP_ROOT" ]]; then
- CGROUP_ROOT=/dev/cgroup/memory
+ CGROUP_ROOT=$(mktemp -d)
mount -t cgroup2 none $CGROUP_ROOT
do_umount=1
fi
diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c
index 5c8a53869b1b..9df2636c829b 100644
--- a/tools/testing/selftests/mm/map_populate.c
+++ b/tools/testing/selftests/mm/map_populate.c
@@ -18,6 +18,8 @@
#include <unistd.h>
#include "../kselftest.h"
+#include "vm_util.h"
+
#define MMAP_SZ 4096
#define BUG_ON(condition, description) \
@@ -87,6 +89,9 @@ int main(int argc, char **argv)
BUG_ON(!ftmp, "tmpfile()");
ret = ftruncate(fileno(ftmp), MMAP_SZ);
+ if (ret < 0 && errno == ENOENT) {
+ skip_test_dodgy_fs("ftruncate()");
+ }
BUG_ON(ret, "ftruncate()");
smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
diff --git a/tools/testing/selftests/mm/mlock-random-test.c b/tools/testing/selftests/mm/mlock-random-test.c
index 1cd80b0f76c3..b8d7e966f44c 100644
--- a/tools/testing/selftests/mm/mlock-random-test.c
+++ b/tools/testing/selftests/mm/mlock-random-test.c
@@ -161,9 +161,9 @@ static void test_mlock_within_limit(char *p, int alloc_size)
MLOCK_ONFAULT);
if (ret)
- ksft_exit_fail_msg("%s() failure at |%p(%d)| mlock:|%p(%d)|\n",
+ ksft_exit_fail_msg("%s() failure (%s) at |%p(%d)| mlock:|%p(%d)|\n",
is_mlock ? "mlock" : "mlock2",
- p, alloc_size,
+ strerror(errno), p, alloc_size,
p + start_offset, lock_size);
}
diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h
index 4417eaa5cfb7..81e77fa41901 100644
--- a/tools/testing/selftests/mm/mlock2.h
+++ b/tools/testing/selftests/mm/mlock2.h
@@ -6,7 +6,13 @@
static int mlock2_(void *start, size_t len, int flags)
{
- return syscall(__NR_mlock2, start, len, flags);
+ int ret = syscall(__NR_mlock2, start, len, flags);
+
+ if (ret) {
+ errno = ret;
+ return -1;
+ }
+ return 0;
}
static FILE *seek_to_smaps_entry(unsigned long addr)
diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h
index 1bad310d282a..17bf2d1b0192 100644
--- a/tools/testing/selftests/mm/pkey-powerpc.h
+++ b/tools/testing/selftests/mm/pkey-powerpc.h
@@ -3,6 +3,8 @@
#ifndef _PKEYS_POWERPC_H
#define _PKEYS_POWERPC_H
+#include <sys/stat.h>
+
#ifndef SYS_pkey_alloc
# define SYS_pkey_alloc 384
# define SYS_pkey_free 385
@@ -102,8 +104,18 @@ static inline void expect_fault_on_read_execonly_key(void *p1, int pkey)
return;
}
+#define REPEAT_8(s) s s s s s s s s
+#define REPEAT_64(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) \
+ REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s)
+#define REPEAT_512(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) \
+ REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s)
+#define REPEAT_4096(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) \
+ REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s)
+#define REPEAT_16384(s) REPEAT_4096(s) REPEAT_4096(s) \
+ REPEAT_4096(s) REPEAT_4096(s)
+
/* 4-byte instructions * 16384 = 64K page */
-#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
+#define __page_o_noops() asm(REPEAT_16384("nop\n"))
static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
{
diff --git a/tools/testing/selftests/mm/pkey_util.c b/tools/testing/selftests/mm/pkey_util.c
index ca4ad0d44ab2..255b332f7a08 100644
--- a/tools/testing/selftests/mm/pkey_util.c
+++ b/tools/testing/selftests/mm/pkey_util.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#define __SANE_USERSPACE_TYPES__
#include <sys/syscall.h>
#include <unistd.h>
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 7cc71d942f83..9aff33b10999 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -187,9 +187,10 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
printf "Not enough huge pages available (%d < %d)\n" \
"$freepgs" "$needpgs"
fi
+ HAVE_HUGEPAGES=1
else
echo "no hugetlbfs support in kernel?"
- exit 1
+ HAVE_HUGEPAGES=0
fi
# filter 64bit architectures
@@ -218,13 +219,20 @@ pretty_name() {
# Usage: run_test [test binary] [arbitrary test arguments...]
run_test() {
if test_selected ${CATEGORY}; then
+ local skip=0
+
# On memory constrainted systems some tests can fail to allocate hugepages.
# perform some cleanup before the test for a higher success rate.
if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then
- echo 3 > /proc/sys/vm/drop_caches
- sleep 2
- echo 1 > /proc/sys/vm/compact_memory
- sleep 2
+ if [ "${HAVE_HUGEPAGES}" = "1" ]; then
+ echo 3 > /proc/sys/vm/drop_caches
+ sleep 2
+ echo 1 > /proc/sys/vm/compact_memory
+ sleep 2
+ else
+ echo "hugepages not supported" | tap_prefix
+ skip=1
+ fi
fi
local test=$(pretty_name "$*")
@@ -232,8 +240,12 @@ run_test() {
local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -)
printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" | tap_prefix
- ("$@" 2>&1) | tap_prefix
- local ret=${PIPESTATUS[0]}
+ if [ "${skip}" != "1" ]; then
+ ("$@" 2>&1) | tap_prefix
+ local ret=${PIPESTATUS[0]}
+ else
+ local ret=$ksft_skip
+ fi
count_total=$(( count_total + 1 ))
if [ $ret -eq 0 ]; then
count_pass=$(( count_pass + 1 ))
@@ -271,13 +283,15 @@ CATEGORY="hugetlb" run_test ./hugepage-vmemmap
CATEGORY="hugetlb" run_test ./hugetlb-madvise
CATEGORY="hugetlb" run_test ./hugetlb_dio
-nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
-# For this test, we need one and just one huge page
-echo 1 > /proc/sys/vm/nr_hugepages
-CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
-CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map
-# Restore the previous number of huge pages, since further tests rely on it
-echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
+if [ "${HAVE_HUGEPAGES}" = "1" ]; then
+ nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
+ # For this test, we need one and just one huge page
+ echo 1 > /proc/sys/vm/nr_hugepages
+ CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
+ CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map
+ # Restore the previous number of huge pages, since further tests rely on it
+ echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
+fi
if test_selected "hugetlb"; then
echo "NOTE: These hugetlb tests provide minimal coverage. Use" | tap_prefix
@@ -311,14 +325,35 @@ CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 3
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem-private 20 16
-CATEGORY="userfaultfd" run_test ./uffd-wp-mremap
+# uffd-wp-mremap requires at least one page of each size.
+have_all_size_hugepgs=true
+declare -A nr_size_hugepgs
+for f in /sys/kernel/mm/hugepages/**/nr_hugepages; do
+ old=$(cat $f)
+ nr_size_hugepgs["$f"]="$old"
+ if [ "$old" == 0 ]; then
+ echo 1 > "$f"
+ fi
+ if [ $(cat "$f") == 0 ]; then
+ have_all_size_hugepgs=false
+ break
+ fi
+done
+if $have_all_size_hugepgs; then
+ CATEGORY="userfaultfd" run_test ./uffd-wp-mremap
+else
+ echo "# SKIP ./uffd-wp-mremap"
+fi
#cleanup
+for f in "${!nr_size_hugepgs[@]}"; do
+ echo "${nr_size_hugepgs["$f"]}" > "$f"
+done
echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
CATEGORY="compaction" run_test ./compaction_test
-if command -v sudo &> /dev/null;
+if command -v sudo &> /dev/null && sudo -u nobody ls ./on-fault-limit >/dev/null;
then
CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit
else
@@ -381,19 +416,21 @@ CATEGORY="mremap" run_test ./mremap_dontunmap
CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
# MADV_GUARD_INSTALL and MADV_GUARD_REMOVE tests
-CATEGORY="madv_guard" run_test ./guard-pages
+CATEGORY="madv_guard" run_test ./guard-regions
# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
CATEGORY="madv_populate" run_test ./madv_populate
if [ -x ./memfd_secret ]
then
-(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
+(echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
CATEGORY="memfd_secret" run_test ./memfd_secret
fi
# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
-CATEGORY="ksm" run_test ./ksm_tests -H -s 100
+if [ "${HAVE_HUGEPAGES}" = "1" ]; then
+ CATEGORY="ksm" run_test ./ksm_tests -H -s 100
+fi
# KSM KSM_MERGE_TIME test with size of 100
CATEGORY="ksm" run_test ./ksm_tests -P -s 100
# KSM MADV_MERGEABLE test with 10 identical pages
@@ -442,15 +479,17 @@ CATEGORY="thp" run_test ./transhuge-stress -d 20
# Try to create XFS if not provided
if [ -z "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then
- if test_selected "thp"; then
- if grep xfs /proc/filesystems &>/dev/null; then
- XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX)
- SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX)
- truncate -s 314572800 ${XFS_IMG}
- mkfs.xfs -q ${XFS_IMG}
- mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
- MOUNTED_XFS=1
- fi
+ if [ "${HAVE_HUGEPAGES}" = "1" ]; then
+ if test_selected "thp"; then
+ if grep xfs /proc/filesystems &>/dev/null; then
+ XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX)
+ SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX)
+ truncate -s 314572800 ${XFS_IMG}
+ mkfs.xfs -q ${XFS_IMG}
+ mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+ MOUNTED_XFS=1
+ fi
+ fi
fi
fi
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index 3f353f3d070f..aa7400ed0e99 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -5,6 +5,7 @@
*/
#define _GNU_SOURCE
+#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
@@ -14,6 +15,7 @@
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/mount.h>
+#include <sys/param.h>
#include <malloc.h>
#include <stdbool.h>
#include <time.h>
@@ -261,18 +263,32 @@ void split_pte_mapped_thp(void)
close(kpageflags_fd);
}
-void split_file_backed_thp(void)
+void split_file_backed_thp(int order)
{
int status;
int fd;
- ssize_t num_written;
char tmpfs_template[] = "/tmp/thp_split_XXXXXX";
const char *tmpfs_loc = mkdtemp(tmpfs_template);
char testfile[INPUT_MAX];
+ ssize_t num_written, num_read;
+ char *file_buf1, *file_buf2;
uint64_t pgoff_start = 0, pgoff_end = 1024;
+ int i;
ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n");
+ file_buf1 = (char *)malloc(pmd_pagesize);
+ file_buf2 = (char *)malloc(pmd_pagesize);
+
+ if (!file_buf1 || !file_buf2) {
+ ksft_print_msg("cannot allocate file buffers\n");
+ goto out;
+ }
+
+ for (i = 0; i < pmd_pagesize; i++)
+ file_buf1[i] = (char)i;
+ memset(file_buf2, 0, pmd_pagesize);
+
status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m");
if (status)
@@ -281,26 +297,45 @@ void split_file_backed_thp(void)
status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc);
if (status >= INPUT_MAX) {
ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n");
+ goto cleanup;
}
- fd = open(testfile, O_CREAT|O_WRONLY, 0664);
+ fd = open(testfile, O_CREAT|O_RDWR, 0664);
if (fd == -1) {
ksft_perror("Cannot open testing file");
goto cleanup;
}
- /* write something to the file, so a file-backed THP can be allocated */
- num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1);
- close(fd);
+ /* write pmd size data to the file, so a file-backed THP can be allocated */
+ num_written = write(fd, file_buf1, pmd_pagesize);
- if (num_written < 1) {
- ksft_perror("Fail to write data to testing file");
- goto cleanup;
+ if (num_written == -1 || num_written != pmd_pagesize) {
+ ksft_perror("Failed to write data to testing file");
+ goto close_file;
}
/* split the file-backed THP */
- write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0);
+ write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, order);
+
+ /* check file content after split */
+ status = lseek(fd, 0, SEEK_SET);
+ if (status == -1) {
+ ksft_perror("Cannot lseek file");
+ goto close_file;
+ }
+
+ num_read = read(fd, file_buf2, num_written);
+ if (num_read == -1 || num_read != num_written) {
+ ksft_perror("Cannot read file content back");
+ goto close_file;
+ }
+
+ if (strncmp(file_buf1, file_buf2, pmd_pagesize) != 0) {
+ ksft_print_msg("File content changed\n");
+ goto close_file;
+ }
+ close(fd);
status = unlink(testfile);
if (status) {
ksft_perror("Cannot remove testing file");
@@ -318,12 +353,15 @@ void split_file_backed_thp(void)
ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno));
ksft_print_msg("Please check dmesg for more information\n");
- ksft_test_result_pass("File-backed THP split test done\n");
+ ksft_test_result_pass("File-backed THP split to order %d test done\n", order);
return;
+close_file:
+ close(fd);
cleanup:
umount(tmpfs_loc);
rmdir(tmpfs_loc);
+out:
ksft_exit_fail_msg("Error occurred\n");
}
@@ -361,6 +399,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
{
size_t i;
int dummy = 0;
+ unsigned char buf[1024];
srand(time(NULL));
@@ -368,11 +407,12 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
if (*fd == -1)
ksft_exit_fail_msg("Failed to create a file at %s\n", testfile);
- for (i = 0; i < fd_size; i++) {
- unsigned char byte = (unsigned char)i;
+ assert(fd_size % sizeof(buf) == 0);
+ for (i = 0; i < sizeof(buf); i++)
+ buf[i] = (unsigned char)i;
+ for (i = 0; i < fd_size; i += sizeof(buf))
+ write(*fd, buf, sizeof(buf));
- write(*fd, &byte, sizeof(byte));
- }
close(*fd);
sync();
*fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
@@ -420,7 +460,8 @@ err_out_unlink:
return -1;
}
-void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc)
+void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc,
+ int order, int offset)
{
int fd;
char *addr;
@@ -438,7 +479,12 @@ void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_l
return;
err = 0;
- write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order);
+ if (offset == -1)
+ write_debugfs(PID_FMT, getpid(), (uint64_t)addr,
+ (uint64_t)addr + fd_size, order);
+ else
+ write_debugfs(PID_FMT, getpid(), (uint64_t)addr,
+ (uint64_t)addr + fd_size, order, offset);
for (i = 0; i < fd_size; i++)
if (*(addr + i) != (char)i) {
@@ -457,9 +503,15 @@ out:
munmap(addr, fd_size);
close(fd);
unlink(testfile);
- if (err)
- ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order);
- ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order);
+ if (offset == -1) {
+ if (err)
+ ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order);
+ ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order);
+ } else {
+ if (err)
+ ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d at in-folio offset %d failed\n", order, offset);
+ ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d at in-folio offset %d passed\n", order, offset);
+ }
}
int main(int argc, char **argv)
@@ -470,6 +522,7 @@ int main(int argc, char **argv)
char fs_loc_template[] = "/tmp/thp_fs_XXXXXX";
const char *fs_loc;
bool created_tmp;
+ int offset;
ksft_print_header();
@@ -481,7 +534,7 @@ int main(int argc, char **argv)
if (argc > 1)
optional_xfs_path = argv[1];
- ksft_set_plan(1+8+2+9);
+ ksft_set_plan(1+8+1+9+9+8*4+2);
pagesize = getpagesize();
pageshift = ffs(pagesize) - 1;
@@ -498,12 +551,19 @@ int main(int argc, char **argv)
split_pmd_thp_to_order(i);
split_pte_mapped_thp();
- split_file_backed_thp();
+ for (i = 0; i < 9; i++)
+ split_file_backed_thp(i);
created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template,
&fs_loc);
for (i = 8; i >= 0; i--)
- split_thp_in_pagecache_to_order(fd_size, i, fs_loc);
+ split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1);
+
+ for (i = 0; i < 9; i++)
+ for (offset = 0;
+ offset < pmd_pagesize / pagesize;
+ offset += MAX(pmd_pagesize / pagesize / 4, 1 << i))
+ split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset);
cleanup_thp_fs(fs_loc, created_tmp);
ksft_finished();
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index e4370b79b62f..cd5174d735be 100644
--- a/tools/testing/selftests/mm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -127,7 +127,7 @@ void test_mmap(unsigned long size, unsigned flags)
show(size);
ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
- "%s mmap\n", __func__);
+ "%s mmap %lu\n", __func__, size);
if (munmap(map, size * NUM_PAGES))
ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno));
@@ -165,7 +165,7 @@ void test_shmget(unsigned long size, unsigned flags)
show(size);
ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
- "%s: mmap\n", __func__);
+ "%s: mmap %lu\n", __func__, size);
if (shmdt(map))
ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno));
}
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
index 7ad6ba660c7d..a37088a23ffe 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -10,7 +10,7 @@
#define BASE_PMD_ADDR ((void *)(1UL << 30))
volatile bool test_uffdio_copy_eexist = true;
-unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
+unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size;
char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap;
int uffd = -1, uffd_flags, finished, *pipefd, test_type;
bool map_shared;
@@ -269,7 +269,7 @@ void uffd_test_ctx_clear(void)
size_t i;
if (pipefd) {
- for (i = 0; i < nr_cpus * 2; ++i) {
+ for (i = 0; i < nr_parallel * 2; ++i) {
if (close(pipefd[i]))
err("close pipefd");
}
@@ -323,7 +323,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg)
ret = userfaultfd_open(&features);
if (ret) {
if (errmsg)
- *errmsg = "possible lack of priviledge";
+ *errmsg = "possible lack of privilege";
return ret;
}
@@ -348,7 +348,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg)
/*
* After initialization of area_src, we must explicitly release pages
* for area_dst to make sure it's fully empty. Otherwise we could have
- * some area_dst pages be errornously initialized with zero pages,
+ * some area_dst pages be erroneously initialized with zero pages,
* hence we could hit memory corruption later in the test.
*
* One example is when THP is globally enabled, above allocate_area()
@@ -365,10 +365,10 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg)
*/
uffd_test_ops->release_pages(area_dst);
- pipefd = malloc(sizeof(int) * nr_cpus * 2);
+ pipefd = malloc(sizeof(int) * nr_parallel * 2);
if (!pipefd)
err("pipefd");
- for (cpu = 0; cpu < nr_cpus; cpu++)
+ for (cpu = 0; cpu < nr_parallel; cpu++)
if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK))
err("pipe");
diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h
index a70ae10b5f62..7700cbfa3975 100644
--- a/tools/testing/selftests/mm/uffd-common.h
+++ b/tools/testing/selftests/mm/uffd-common.h
@@ -98,7 +98,7 @@ struct uffd_test_case_ops {
};
typedef struct uffd_test_case_ops uffd_test_case_ops_t;
-extern unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
+extern unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size;
extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap;
extern int uffd, uffd_flags, finished, *pipefd, test_type;
extern bool map_shared;
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
index 944d559ade21..40af7f67c407 100644
--- a/tools/testing/selftests/mm/uffd-stress.c
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -180,12 +180,12 @@ static void *background_thread(void *arg)
static int stress(struct uffd_args *args)
{
unsigned long cpu;
- pthread_t locking_threads[nr_cpus];
- pthread_t uffd_threads[nr_cpus];
- pthread_t background_threads[nr_cpus];
+ pthread_t locking_threads[nr_parallel];
+ pthread_t uffd_threads[nr_parallel];
+ pthread_t background_threads[nr_parallel];
finished = 0;
- for (cpu = 0; cpu < nr_cpus; cpu++) {
+ for (cpu = 0; cpu < nr_parallel; cpu++) {
if (pthread_create(&locking_threads[cpu], &attr,
locking_thread, (void *)cpu))
return 1;
@@ -203,7 +203,7 @@ static int stress(struct uffd_args *args)
background_thread, (void *)cpu))
return 1;
}
- for (cpu = 0; cpu < nr_cpus; cpu++)
+ for (cpu = 0; cpu < nr_parallel; cpu++)
if (pthread_join(background_threads[cpu], NULL))
return 1;
@@ -219,11 +219,11 @@ static int stress(struct uffd_args *args)
uffd_test_ops->release_pages(area_src);
finished = 1;
- for (cpu = 0; cpu < nr_cpus; cpu++)
+ for (cpu = 0; cpu < nr_parallel; cpu++)
if (pthread_join(locking_threads[cpu], NULL))
return 1;
- for (cpu = 0; cpu < nr_cpus; cpu++) {
+ for (cpu = 0; cpu < nr_parallel; cpu++) {
char c;
if (bounces & BOUNCE_POLL) {
if (write(pipefd[cpu*2+1], &c, 1) != 1)
@@ -246,11 +246,11 @@ static int userfaultfd_stress(void)
{
void *area;
unsigned long nr;
- struct uffd_args args[nr_cpus];
+ struct uffd_args args[nr_parallel];
uint64_t mem_size = nr_pages * page_size;
int flags = 0;
- memset(args, 0, sizeof(struct uffd_args) * nr_cpus);
+ memset(args, 0, sizeof(struct uffd_args) * nr_parallel);
if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON)
flags = UFFD_FEATURE_WP_UNPOPULATED;
@@ -325,7 +325,7 @@ static int userfaultfd_stress(void)
*/
uffd_test_ops->release_pages(area_dst);
- uffd_stats_reset(args, nr_cpus);
+ uffd_stats_reset(args, nr_parallel);
/* bounce pass */
if (stress(args)) {
@@ -359,7 +359,7 @@ static int userfaultfd_stress(void)
swap(area_src_alias, area_dst_alias);
- uffd_stats_report(args, nr_cpus);
+ uffd_stats_report(args, nr_parallel);
}
uffd_test_ctx_clear();
@@ -412,8 +412,8 @@ static void parse_test_type_arg(const char *raw_type)
* feature.
*/
- if (uffd_get_features(&features))
- err("failed to get available features");
+ if (uffd_get_features(&features) && errno == ENOENT)
+ ksft_exit_skip("failed to get available features (%d)\n", errno);
test_uffdio_wp = test_uffdio_wp &&
(features & UFFD_FEATURE_PAGEFAULT_FLAG_WP);
@@ -435,6 +435,7 @@ static void sigalrm(int sig)
int main(int argc, char **argv)
{
+ unsigned long nr_cpus;
size_t bytes;
if (argc < 4)
@@ -454,10 +455,19 @@ int main(int argc, char **argv)
}
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (nr_cpus > 32) {
+ /* Don't let calculation below go to zero. */
+ ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n",
+ nr_cpus);
+ nr_parallel = 32;
+ } else {
+ nr_parallel = nr_cpus;
+ }
- nr_pages_per_cpu = bytes / page_size / nr_cpus;
+ nr_pages_per_cpu = bytes / page_size / nr_parallel;
if (!nr_pages_per_cpu) {
- _err("invalid MiB");
+ _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)",
+ bytes, page_size, nr_parallel);
usage();
}
@@ -466,7 +476,7 @@ int main(int argc, char **argv)
_err("invalid bounces");
usage();
}
- nr_pages = nr_pages_per_cpu * nr_cpus;
+ nr_pages = nr_pages_per_cpu * nr_parallel;
printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
nr_pages, nr_pages_per_cpu);
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 74c8bc02b506..e8fd9011c2a3 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -26,6 +26,8 @@
#define ALIGN_UP(x, align_to) \
((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1)))
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
struct mem_type {
const char *name;
unsigned int mem_flag;
@@ -196,9 +198,10 @@ uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test,
else
page_size = psize();
- nr_pages = UFFD_TEST_MEM_SIZE / page_size;
+ /* Ensure we have at least 2 pages */
+ nr_pages = MAX(UFFD_TEST_MEM_SIZE, page_size * 2) / page_size;
/* TODO: remove this global var.. it's so ugly */
- nr_cpus = 1;
+ nr_parallel = 1;
/* Initialize test arguments */
args->mem_type = mem_type;
diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c
index 2c4f984bd73c..c2ba7d46c7b4 100644
--- a/tools/testing/selftests/mm/uffd-wp-mremap.c
+++ b/tools/testing/selftests/mm/uffd-wp-mremap.c
@@ -182,7 +182,10 @@ static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb
/* Register range for uffd-wp. */
if (userfaultfd_open(&features)) {
- ksft_test_result_fail("userfaultfd_open() failed\n");
+ if (errno == ENOENT)
+ ksft_test_result_skip("userfaultfd not available\n");
+ else
+ ksft_test_result_fail("userfaultfd_open() failed\n");
goto out;
}
if (uffd_register(uffd, mem, size, false, true, false)) {
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
index 2c725773cd79..1f92e8caceac 100755
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -41,6 +41,31 @@ check_supported_x86_64()
fi
}
+check_supported_ppc64()
+{
+ local config="/proc/config.gz"
+ [[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
+ [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot"
+
+ local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
+ if [[ "${pg_table_levels}" -lt 5 ]]; then
+ echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
+ exit $ksft_skip
+ fi
+
+ local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}')
+ if [[ "$mmu_support" != "radix" ]]; then
+ echo "$0: System does not use Radix MMU, required for 5-level paging"
+ exit $ksft_skip
+ fi
+
+ local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo)
+ if [[ "${hugepages_total}" -eq 0 ]]; then
+ echo "$0: HugePages are not enabled, required for some tests"
+ exit $ksft_skip
+ fi
+}
+
check_test_requirements()
{
# The test supports x86_64 and powerpc64. We currently have no useful
@@ -50,6 +75,9 @@ check_test_requirements()
"x86_64")
check_supported_x86_64
;;
+ "ppc64le"|"ppc64")
+ check_supported_ppc64
+ ;;
*)
return 0
;;
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index b60ac68a9dc8..6effafdc4d8a 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -5,11 +5,13 @@
#include <err.h>
#include <strings.h> /* ffsl() */
#include <unistd.h> /* _SC_PAGESIZE */
+#include "../kselftest.h"
#define BIT_ULL(nr) (1ULL << (nr))
#define PM_SOFT_DIRTY BIT_ULL(55)
#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
#define PM_UFFD_WP BIT_ULL(57)
+#define PM_GUARD_REGION BIT_ULL(58)
#define PM_FILE BIT_ULL(61)
#define PM_SWAP BIT_ULL(62)
#define PM_PRESENT BIT_ULL(63)
@@ -31,6 +33,23 @@ static inline unsigned int pshift(void)
return __page_shift;
}
+/*
+ * Plan 9 FS has bugs (at least on QEMU) where certain operations fail with
+ * ENOENT on unlinked files. See
+ * https://gitlab.com/qemu-project/qemu/-/issues/103 for some info about such
+ * bugs. There are rumours of NFS implementations with similar bugs.
+ *
+ * Ideally, tests should just detect filesystems known to have such issues and
+ * bail early. But 9pfs has the additional "feature" that it causes fstatfs to
+ * pass through the f_type field from the host filesystem. To avoid having to
+ * scrape /proc/mounts or some other hackery, tests can call this function when
+ * it seems such a bug might have been encountered.
+ */
+static inline void skip_test_dodgy_fs(const char *op_name)
+{
+ ksft_test_result_skip("%s failed with ENOENT. Filesystem might be buggy (9pfs?)\n", op_name);
+}
+
uint64_t pagemap_get_entry(int fd, char *start);
bool pagemap_is_softdirty(int fd, char *start);
bool pagemap_is_swapped(int fd, char *start);
diff --git a/tools/testing/selftests/mseal_system_mappings/.gitignore b/tools/testing/selftests/mseal_system_mappings/.gitignore
new file mode 100644
index 000000000000..319c497a595e
--- /dev/null
+++ b/tools/testing/selftests/mseal_system_mappings/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+sysmap_is_sealed
diff --git a/tools/testing/selftests/mseal_system_mappings/Makefile b/tools/testing/selftests/mseal_system_mappings/Makefile
new file mode 100644
index 000000000000..2b4504e2f52f
--- /dev/null
+++ b/tools/testing/selftests/mseal_system_mappings/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES)
+
+TEST_GEN_PROGS := sysmap_is_sealed
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mseal_system_mappings/config b/tools/testing/selftests/mseal_system_mappings/config
new file mode 100644
index 000000000000..675cb9f37b86
--- /dev/null
+++ b/tools/testing/selftests/mseal_system_mappings/config
@@ -0,0 +1 @@
+CONFIG_MSEAL_SYSTEM_MAPPINGS=y
diff --git a/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c b/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c
new file mode 100644
index 000000000000..0d2af30c3bf5
--- /dev/null
+++ b/tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * test system mappings are sealed when
+ * KCONFIG_MSEAL_SYSTEM_MAPPINGS=y
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdbool.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+#define VMFLAGS "VmFlags:"
+#define MSEAL_FLAGS "sl"
+#define MAX_LINE_LEN 512
+
+bool has_mapping(char *name, FILE *maps)
+{
+ char line[MAX_LINE_LEN];
+
+ while (fgets(line, sizeof(line), maps)) {
+ if (strstr(line, name))
+ return true;
+ }
+
+ return false;
+}
+
+bool mapping_is_sealed(char *name, FILE *maps)
+{
+ char line[MAX_LINE_LEN];
+
+ while (fgets(line, sizeof(line), maps)) {
+ if (!strncmp(line, VMFLAGS, strlen(VMFLAGS))) {
+ if (strstr(line, MSEAL_FLAGS))
+ return true;
+
+ return false;
+ }
+ }
+
+ return false;
+}
+
+FIXTURE(basic) {
+ FILE *maps;
+};
+
+FIXTURE_SETUP(basic)
+{
+ self->maps = fopen("/proc/self/smaps", "r");
+ if (!self->maps)
+ SKIP(return, "Could not open /proc/self/smap, errno=%d",
+ errno);
+};
+
+FIXTURE_TEARDOWN(basic)
+{
+ if (self->maps)
+ fclose(self->maps);
+};
+
+FIXTURE_VARIANT(basic)
+{
+ char *name;
+ bool sealed;
+};
+
+FIXTURE_VARIANT_ADD(basic, vdso) {
+ .name = "[vdso]",
+ .sealed = true,
+};
+
+FIXTURE_VARIANT_ADD(basic, vvar) {
+ .name = "[vvar]",
+ .sealed = true,
+};
+
+FIXTURE_VARIANT_ADD(basic, vvar_vclock) {
+ .name = "[vvar_vclock]",
+ .sealed = true,
+};
+
+FIXTURE_VARIANT_ADD(basic, sigpage) {
+ .name = "[sigpage]",
+ .sealed = true,
+};
+
+FIXTURE_VARIANT_ADD(basic, vectors) {
+ .name = "[vectors]",
+ .sealed = true,
+};
+
+FIXTURE_VARIANT_ADD(basic, uprobes) {
+ .name = "[uprobes]",
+ .sealed = true,
+};
+
+FIXTURE_VARIANT_ADD(basic, stack) {
+ .name = "[stack]",
+ .sealed = false,
+};
+
+TEST_F(basic, check_sealed)
+{
+ if (!has_mapping(variant->name, self->maps)) {
+ SKIP(return, "could not find the mapping, %s",
+ variant->name);
+ }
+
+ EXPECT_EQ(variant->sealed,
+ mapping_is_sealed(variant->name, self->maps));
+};
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 679542f565a4..532bb732bc6d 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -39,6 +39,7 @@ scm_rights
sk_bind_sendto_listen
sk_connect_zero_addr
sk_so_peek_off
+skf_net_off
socket
so_incoming_cpu
so_netns_cookie
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 6d718b478ed8..70a38f485d4d 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -31,6 +31,7 @@ TEST_PROGS += veth.sh
TEST_PROGS += ioam6.sh
TEST_PROGS += gro.sh
TEST_PROGS += gre_gso.sh
+TEST_PROGS += gre_ipv6_lladdr.sh
TEST_PROGS += cmsg_so_mark.sh
TEST_PROGS += cmsg_so_priority.sh
TEST_PROGS += test_so_rcv.sh
@@ -106,6 +107,8 @@ TEST_PROGS += ipv6_route_update_soft_lockup.sh
TEST_PROGS += busy_poll_test.sh
TEST_GEN_PROGS += proc_net_pktgen
TEST_PROGS += lwt_dst_cache_ref_loop.sh
+TEST_PROGS += skf_net_off.sh
+TEST_GEN_FILES += skf_net_off
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
index d458b45c775b..3ef209cacb8e 100755
--- a/tools/testing/selftests/net/amt.sh
+++ b/tools/testing/selftests/net/amt.sh
@@ -194,15 +194,21 @@ test_remote_ip()
send_mcast_torture4()
{
- ip netns exec "${SOURCE}" bash -c \
- 'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001'
+ for i in `seq 10`; do
+ ip netns exec "${SOURCE}" bash -c \
+ 'cat /dev/urandom | head -c 100M | nc -w 1 -u 239.0.0.1 4001'
+ echo -n "."
+ done
}
send_mcast_torture6()
{
- ip netns exec "${SOURCE}" bash -c \
- 'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001'
+ for i in `seq 10`; do
+ ip netns exec "${SOURCE}" bash -c \
+ 'cat /dev/urandom | head -c 100M | nc -w 1 -u ff0e::5:6 6001'
+ echo -n "."
+ done
}
check_features()
@@ -278,10 +284,12 @@ wait $pid || err=$?
if [ $err -eq 1 ]; then
ERR=1
fi
+printf "TEST: %-50s" "IPv4 amt traffic forwarding torture"
send_mcast_torture4
-printf "TEST: %-60s [ OK ]\n" "IPv4 amt traffic forwarding torture"
+printf " [ OK ]\n"
+printf "TEST: %-50s" "IPv6 amt traffic forwarding torture"
send_mcast_torture6
-printf "TEST: %-60s [ OK ]\n" "IPv6 amt traffic forwarding torture"
+printf " [ OK ]\n"
sleep 5
if [ "${ERR}" -eq 1 ]; then
echo "Some tests failed." >&2
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index b866bab1d92a..c7cea556b416 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -359,6 +359,23 @@ fib_rule6_test()
"$getnomatch" "iif flowlabel masked redirect to table" \
"iif flowlabel masked no redirect to table"
fi
+
+ $IP link show dev $DEV | grep -q vrf0
+ if [ $? -eq 0 ]; then
+ match="oif vrf0"
+ getmatch="oif $DEV"
+ getnomatch="oif lo"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF oif redirect to table" \
+ "VRF oif no redirect to table"
+
+ match="from $SRC_IP6 iif vrf0"
+ getmatch="from $SRC_IP6 iif $DEV"
+ getnomatch="from $SRC_IP6 iif lo"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF iif redirect to table" \
+ "VRF iif no redirect to table"
+ fi
}
fib_rule6_vrf_test()
@@ -635,6 +652,23 @@ fib_rule4_test()
"$getnomatch" "iif dscp masked redirect to table" \
"iif dscp masked no redirect to table"
fi
+
+ $IP link show dev $DEV | grep -q vrf0
+ if [ $? -eq 0 ]; then
+ match="oif vrf0"
+ getmatch="oif $DEV"
+ getnomatch="oif lo"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF oif redirect to table" \
+ "VRF oif no redirect to table"
+
+ match="from $SRC_IP iif vrf0"
+ getmatch="from $SRC_IP iif $DEV"
+ getnomatch="from $SRC_IP iif lo"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF iif redirect to table" \
+ "VRF iif no redirect to table"
+ fi
}
fib_rule4_vrf_test()
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 90f8a244ea90..e59fba366a0a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged"
NUM_NETIFS=4
CHECK_TC="yes"
source lib.sh
@@ -194,6 +194,100 @@ other_tpid()
tc qdisc del dev $h2 clsact
}
+8021p_do()
+{
+ local should_fail=$1; shift
+ local mac=de:ad:be:ef:13:37
+
+ tc filter add dev $h2 ingress protocol all pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa"
+ sleep 1
+
+ tc -j -s filter show dev $h2 ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err_fail $should_fail $? "802.1p-tagged reception"
+
+ tc filter del dev $h2 ingress pref 1
+}
+
+8021p()
+{
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ ip link set $h2 promisc on
+
+ # Test that with the default_pvid, 1, packets tagged with VID 0 are
+ # accepted.
+ 8021p_do 0
+
+ # Test that packets tagged with VID 0 are still accepted after changing
+ # the default_pvid.
+ ip link set br0 type bridge vlan_default_pvid 10
+ 8021p_do 0
+
+ log_test "Reception of 802.1p-tagged traffic"
+
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
+send_untagged_and_8021p()
+{
+ ping_do $h1 192.0.2.2
+ check_fail $?
+
+ 8021p_do 1
+}
+
+drop_untagged()
+{
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ ip link set $h2 promisc on
+
+ # Test that with no PVID, untagged and 802.1p-tagged traffic is
+ # dropped.
+ ip link set br0 type bridge vlan_default_pvid 1
+
+ # First we reconfigure the default_pvid, 1, as a non-PVID VLAN.
+ bridge vlan add dev $swp1 vid 1 untagged
+ send_untagged_and_8021p
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+
+ # Next we try to delete VID 1 altogether
+ bridge vlan del dev $swp1 vid 1
+ send_untagged_and_8021p
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+
+ # Set up the bridge without a default_pvid, then check that the 8021q
+ # module, when the bridge port goes down and then up again, does not
+ # accidentally re-enable untagged packet reception.
+ ip link set br0 type bridge vlan_default_pvid 0
+ ip link set $swp1 down
+ ip link set $swp1 up
+ setup_wait
+ send_untagged_and_8021p
+
+ # Remove swp1 as a bridge port and let it rejoin the bridge while it
+ # has no default_pvid.
+ ip link set $swp1 nomaster
+ ip link set $swp1 master br0
+ send_untagged_and_8021p
+
+ # Restore settings
+ ip link set br0 type bridge vlan_default_pvid 1
+
+ log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID"
+
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh
new file mode 100755
index 000000000000..8992aeabfe0b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh
@@ -0,0 +1,421 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS=" \
+ test_clock_jump_backward \
+ test_taprio_after_ptp \
+ test_max_sdu \
+ test_clock_jump_backward_forward \
+"
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+source tsn_lib.sh
+
+require_command python3
+
+# The test assumes the usual topology from the README, where h1 is connected to
+# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge.
+# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2.
+# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to
+# swp1 (and both to CLOCK_REALTIME).
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+UDS_ADDRESS_H1="/var/run/ptp4l_h1"
+UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1"
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# Tunables
+NUM_PKTS=100
+STREAM_VID=10
+STREAM_PRIO_1=6
+STREAM_PRIO_2=5
+STREAM_PRIO_3=4
+# PTP uses TC 0
+ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2))
+# Use a conservative cycle of 10 ms to allow the test to still pass when the
+# kernel has some extra overhead like lockdep etc
+CYCLE_TIME_NS=10000000
+# Create two Gate Control List entries, one OPEN and one CLOSE, of equal
+# durations
+GATE_DURATION_NS=$((CYCLE_TIME_NS / 2))
+# Give 2/3 of the cycle time to user space and 1/3 to the kernel
+FUDGE_FACTOR=$((CYCLE_TIME_NS / 3))
+# Shift the isochron base time by half the gate time, so that packets are
+# always received by swp1 close to the middle of the time slot, to minimize
+# inaccuracies due to network sync
+SHIFT_TIME_NS=$((GATE_DURATION_NS / 2))
+
+path_delay=
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+switch_create()
+{
+ local h2_mac_addr=$(mac_get $h2)
+
+ ip link set $swp1 up
+ ip link set $swp2 up
+
+ ip link add br0 type bridge vlan_filtering 1
+ ip link set $swp1 master br0
+ ip link set $swp2 master br0
+ ip link set br0 up
+
+ bridge vlan add dev $swp2 vid $STREAM_VID
+ bridge vlan add dev $swp1 vid $STREAM_VID
+ bridge fdb add dev $swp2 \
+ $h2_mac_addr vlan $STREAM_VID static master
+}
+
+switch_destroy()
+{
+ ip link del br0
+}
+
+ptp_setup()
+{
+ # Set up swp1 as a master PHC for h1, synchronized to the local
+ # CLOCK_REALTIME.
+ phc2sys_start $UDS_ADDRESS_SWP1
+ ptp4l_start $h1 true $UDS_ADDRESS_H1
+ ptp4l_start $swp1 false $UDS_ADDRESS_SWP1
+}
+
+ptp_cleanup()
+{
+ ptp4l_stop $swp1
+ ptp4l_stop $h1
+ phc2sys_stop
+}
+
+txtime_setup()
+{
+ local if_name=$1
+
+ tc qdisc add dev $if_name clsact
+ # Classify PTP on TC 7 and isochron on TC 6
+ tc filter add dev $if_name egress protocol 0x88f7 \
+ flower action skbedit priority 7
+ tc filter add dev $if_name egress protocol 802.1Q \
+ flower vlan_ethtype 0xdead action skbedit priority 6
+ tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ hw 1
+ # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1.
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+}
+
+txtime_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name clsact
+ tc qdisc del dev $if_name root
+}
+
+taprio_replace()
+{
+ local if_name="$1"; shift
+ local extra_args="$1"; shift
+
+ # STREAM_PRIO_1 always has an open gate.
+ # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time)
+ # STREAM_PRIO_3 always has a closed gate.
+ tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \
+ sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \
+ base-time 0 flags 0x2 $extra_args
+ taprio_wait_for_admin $if_name
+}
+
+taprio_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name root
+}
+
+probe_path_delay()
+{
+ local isochron_dat="$(mktemp)"
+ local received
+
+ log_info "Probing path delay"
+
+ isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \
+ "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \
+ "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat"
+
+ received=$(isochron_report_num_received "$isochron_dat")
+ if [ "$received" != "$NUM_PKTS" ]; then
+ echo "Cannot establish basic data path between $h1 and $h2"
+ exit $ksft_fail
+ fi
+
+ printf "pdelay = {}\n" > isochron_data.py
+ isochron report --input-file "$isochron_dat" \
+ --printf-format "pdelay[%u] = %d - %d\n" \
+ --printf-args "qRT" \
+ >> isochron_data.py
+ cat <<-'EOF' > isochron_postprocess.py
+ #!/usr/bin/env python3
+
+ from isochron_data import pdelay
+ import numpy as np
+
+ w = np.array(list(pdelay.values()))
+ print("{}".format(np.max(w)))
+ EOF
+ path_delay=$(python3 ./isochron_postprocess.py)
+
+ log_info "Path delay from $h1 to $h2 estimated at $path_delay ns"
+
+ if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then
+ echo "Path delay larger than gate duration, aborting"
+ exit $ksft_fail
+ fi
+
+ rm -f ./isochron_data.py 2> /dev/null
+ rm -f ./isochron_postprocess.py 2> /dev/null
+ rm -f "$isochron_dat" 2> /dev/null
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+
+ txtime_setup $h1
+
+ # Temporarily set up PTP just to probe the end-to-end path delay.
+ ptp_setup
+ probe_path_delay
+ ptp_cleanup
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ isochron_recv_stop
+ txtime_cleanup $h1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+run_test()
+{
+ local base_time=$1; shift
+ local stream_prio=$1; shift
+ local expected_delay=$1; shift
+ local should_fail=$1; shift
+ local test_name=$1; shift
+ local isochron_dat="$(mktemp)"
+ local received
+ local median_delay
+
+ RET=0
+
+ # Set the shift time equal to the cycle time, which effectively
+ # cancels the default advance time. Packets won't be sent early in
+ # software, which ensures that they won't prematurely enter through
+ # the open gate in __test_out_of_band(). Also, the gate is open for
+ # long enough that this won't cause a problem in __test_in_band().
+ isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \
+ "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \
+ "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat"
+
+ received=$(isochron_report_num_received "$isochron_dat")
+ [ "$received" = "$NUM_PKTS" ]
+ check_err_fail $should_fail $? "Reception of $NUM_PKTS packets"
+
+ if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then
+ printf "pdelay = {}\n" > isochron_data.py
+ isochron report --input-file "$isochron_dat" \
+ --printf-format "pdelay[%u] = %d - %d\n" \
+ --printf-args "qRT" \
+ >> isochron_data.py
+ cat <<-'EOF' > isochron_postprocess.py
+ #!/usr/bin/env python3
+
+ from isochron_data import pdelay
+ import numpy as np
+
+ w = np.array(list(pdelay.values()))
+ print("{}".format(int(np.median(w))))
+ EOF
+ median_delay=$(python3 ./isochron_postprocess.py)
+
+ # If the condition below is true, packets were delayed by a closed gate
+ [ "$median_delay" -gt $((path_delay + expected_delay)) ]
+ check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay"
+
+ # If the condition below is true, packets were sent expecting them to
+ # hit a closed gate in the switch, but were not delayed
+ [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ]
+ check_fail $? "Median delay $median_delay is less than expected delay $expected_delay"
+ fi
+
+ log_test "$test_name"
+
+ rm -f ./isochron_data.py 2> /dev/null
+ rm -f ./isochron_postprocess.py 2> /dev/null
+ rm -f "$isochron_dat" 2> /dev/null
+}
+
+__test_always_open()
+{
+ run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open"
+}
+
+__test_always_closed()
+{
+ run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed"
+}
+
+__test_in_band()
+{
+ # Send packets in-band with the OPEN gate entry
+ run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate"
+}
+
+__test_out_of_band()
+{
+ # Send packets in-band with the CLOSE gate entry
+ run_test 0.005000000 $STREAM_PRIO_2 \
+ $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \
+ "Out of band with gate"
+}
+
+run_subtests()
+{
+ __test_always_open
+ __test_always_closed
+ __test_in_band
+ __test_out_of_band
+}
+
+test_taprio_after_ptp()
+{
+ log_info "Setting up taprio after PTP"
+ ptp_setup
+ taprio_replace $swp2
+ run_subtests
+ taprio_cleanup $swp2
+ ptp_cleanup
+}
+
+__test_under_max_sdu()
+{
+ # Limit max-sdu for STREAM_PRIO_1
+ taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0"
+ run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU"
+}
+
+__test_over_max_sdu()
+{
+ # Limit max-sdu for STREAM_PRIO_1
+ taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0"
+ run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU"
+}
+
+test_max_sdu()
+{
+ ptp_setup
+ __test_under_max_sdu
+ __test_over_max_sdu
+ taprio_cleanup $swp2
+ ptp_cleanup
+}
+
+# Perform a clock jump in the past without synchronization running, so that the
+# time base remains where it was set by phc_ctl.
+test_clock_jump_backward()
+{
+ # This is a more complex schedule specifically crafted in a way that
+ # has been problematic on NXP LS1028A. Not much to test with it other
+ # than the fact that it passes traffic.
+ tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \
+ base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \
+ sched-entry S 20 300000 sched-entry S 48 200000 \
+ sched-entry S 20 300000 sched-entry S 83 200000 \
+ sched-entry S 40 300000 sched-entry S 00 200000 flags 2
+
+ log_info "Forcing a backward clock jump"
+ phc_ctl $swp1 set 0
+
+ ping_test $h1 192.0.2.2
+ taprio_cleanup $swp2
+}
+
+# Test that taprio tolerates clock jumps.
+# Since ptp4l and phc2sys are running, it is expected for the time to
+# eventually recover (through yet another clock jump). Isochron waits
+# until that is the case.
+test_clock_jump_backward_forward()
+{
+ log_info "Forcing a backward and a forward clock jump"
+ taprio_replace $swp2
+ phc_ctl $swp1 set 0
+ ptp_setup
+ ping_test $h1 192.0.2.2
+ run_subtests
+ ptp_cleanup
+ taprio_cleanup $swp2
+}
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_test_skip "Could not test offloaded functionality"
+ exit $EXIT_STATUS
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
index b91bcd8008a9..08c044ff6689 100644
--- a/tools/testing/selftests/net/forwarding/tsn_lib.sh
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -2,6 +2,8 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright 2021-2022 NXP
+tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts
+
REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes}
REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes}
@@ -18,6 +20,7 @@ fi
if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then
require_command phc2sys
require_command ptp4l
+ require_command phc_ctl
fi
phc2sys_start()
@@ -182,6 +185,7 @@ isochron_do()
local base_time=$1; shift
local cycle_time=$1; shift
local shift_time=$1; shift
+ local window_size=$1; shift
local num_pkts=$1; shift
local vid=$1; shift
local priority=$1; shift
@@ -212,6 +216,10 @@ isochron_do()
extra_args="${extra_args} --shift-time=${shift_time}"
fi
+ if ! [ -z "${window_size}" ]; then
+ extra_args="${extra_args} --window-size=${window_size}"
+ fi
+
if [ "${use_l2}" = "true" ]; then
extra_args="${extra_args} --l2 --etype=0xdead ${vid}"
receiver_extra_args="--l2 --etype=0xdead"
@@ -247,3 +255,21 @@ isochron_do()
cpufreq_restore ${ISOCHRON_CPU}
}
+
+isochron_report_num_received()
+{
+ local isochron_dat=$1; shift
+
+ # Count all received packets by looking at the non-zero RX timestamps
+ isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "%u\n" --printf-args "R" | \
+ grep -w -v '0' | wc -l
+}
+
+taprio_wait_for_admin()
+{
+ local if_name="$1"; shift
+
+ "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name"
+}
diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
new file mode 100755
index 000000000000..5b34f6e1f831
--- /dev/null
+++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
@@ -0,0 +1,177 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./lib.sh
+
+PAUSE_ON_FAIL="no"
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+ cleanup_all_ns
+
+ exit "${EXIT_STATUS}"
+}
+
+# Add fake IPv4 and IPv6 networks on the loopback device, to be used as
+# underlay by future GRE devices.
+#
+setup_basenet()
+{
+ ip -netns "${NS0}" link set dev lo up
+ ip -netns "${NS0}" address add dev lo 192.0.2.10/24
+ ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad
+}
+
+# Check if network device has an IPv6 link-local address assigned.
+#
+# Parameters:
+#
+# * $1: The network device to test
+# * $2: An extra regular expression that should be matched (to verify the
+# presence of extra attributes)
+# * $3: The expected return code from grep (to allow checking the absence of
+# a link-local address)
+# * $4: The user visible name for the scenario being tested
+#
+check_ipv6_ll_addr()
+{
+ local DEV="$1"
+ local EXTRA_MATCH="$2"
+ local XRET="$3"
+ local MSG="$4"
+
+ RET=0
+ set +e
+ ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}"
+ check_err_fail "${XRET}" $? ""
+ log_test "${MSG}"
+ set -e
+}
+
+# Create a GRE device and verify that it gets an IPv6 link-local address as
+# expected.
+#
+# Parameters:
+#
+# * $1: The device type (gre, ip6gre, gretap or ip6gretap)
+# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any")
+# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any")
+# * $4: The IPv6 interface identifier generation mode to use for the GRE
+# device (eui64, none, stable-privacy or random).
+#
+test_gre_device()
+{
+ local GRE_TYPE="$1"
+ local LOCAL_IP="$2"
+ local REMOTE_IP="$3"
+ local MODE="$4"
+ local ADDR_GEN_MODE
+ local MATCH_REGEXP
+ local MSG
+
+ ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}"
+
+ case "${MODE}" in
+ "eui64")
+ ADDR_GEN_MODE=0
+ MATCH_REGEXP=""
+ MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ ;;
+ "none")
+ ADDR_GEN_MODE=1
+ MATCH_REGEXP=""
+ MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=1 # No link-local address should be generated
+ ;;
+ "stable-privacy")
+ ADDR_GEN_MODE=2
+ MATCH_REGEXP="stable-privacy"
+ MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ # Initialise stable_secret (required for stable-privacy mode)
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd"
+ ;;
+ "random")
+ ADDR_GEN_MODE=3
+ MATCH_REGEXP="stable-privacy"
+ MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ ;;
+ esac
+
+ # Check that IPv6 link-local address is generated when device goes up
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
+ ip -netns "${NS0}" link set dev gretest up
+ check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}"
+
+ # Now disable link-local address generation
+ ip -netns "${NS0}" link set dev gretest down
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1
+ ip -netns "${NS0}" link set dev gretest up
+
+ # Check that link-local address generation works when re-enabled while
+ # the device is already up
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
+ check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}"
+
+ ip -netns "${NS0}" link del dev gretest
+}
+
+test_gre4()
+{
+ local GRE_TYPE
+ local MODE
+
+ for GRE_TYPE in "gre" "gretap"; do
+ printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n"
+
+ for MODE in "eui64" "none" "stable-privacy" "random"; do
+ test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}"
+ done
+ done
+}
+
+test_gre6()
+{
+ local GRE_TYPE
+ local MODE
+
+ for GRE_TYPE in "ip6gre" "ip6gretap"; do
+ printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n"
+
+ for MODE in "eui64" "none" "stable-privacy" "random"; do
+ test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}"
+ done
+ done
+}
+
+usage()
+{
+ echo "Usage: $0 [-p]"
+ exit 1
+}
+
+while getopts :p o
+do
+ case $o in
+ p) PAUSE_ON_FAIL="yes";;
+ *) usage;;
+ esac
+done
+
+setup_ns NS0
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_basenet
+
+test_gre4
+test_gre6
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 975be4fdbcdb..701905eeff66 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -222,6 +222,31 @@ setup_ns()
NS_LIST+=("${ns_list[@]}")
}
+# Create netdevsim with given id and net namespace.
+create_netdevsim() {
+ local id="$1"
+ local ns="$2"
+
+ modprobe netdevsim &> /dev/null
+ udevadm settle
+
+ echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null
+ local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net)
+ ip -netns $ns link set dev $dev name nsim$id
+ ip -netns $ns link set dev nsim$id up
+
+ echo nsim$id
+}
+
+# Remove netdevsim with given id.
+cleanup_netdevsim() {
+ local id="$1"
+
+ if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then
+ echo "$id" > /sys/bus/netdevsim/del_device
+ fi
+}
+
tc_rule_stats_get()
{
local dev=$1; shift
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
index 49daae73c41e..833279fb34e2 100644
--- a/tools/testing/selftests/net/mptcp/.gitignore
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
mptcp_connect
+mptcp_diag
mptcp_inq
mptcp_sockopt
pm_nl_ctl
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 4f55477ffe08..e7a75341f0f3 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -206,9 +206,8 @@ chk_dump_one()
local token
local msg
- ss_token="$(ss -inmHMN $ns | grep 'token:' |\
- head -n 1 |\
- sed 's/.*token:\([0-9a-f]*\).*/\1/')"
+ ss_token="$(ss -inmHMN $ns |
+ mptcp_lib_get_info_value "token" "token")"
token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\
awk -F':[ \t]+' '/^token/ {print $2}')"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index d240d02fa443..c83a8b47bbdf 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -1270,7 +1270,7 @@ int main_loop(void)
if (cfg_input && cfg_sockopt_types.mptfo) {
fd_in = open(cfg_input, O_RDONLY);
- if (fd < 0)
+ if (fd_in < 0)
xerror("can't open %s:%d", cfg_input, errno);
}
@@ -1293,13 +1293,13 @@ again:
if (cfg_input && !cfg_sockopt_types.mptfo) {
fd_in = open(cfg_input, O_RDONLY);
- if (fd < 0)
+ if (fd_in < 0)
xerror("can't open %s:%d", cfg_input, errno);
}
ret = copyfd_io(fd_in, fd, 1, 0, &winfo);
if (ret)
- return ret;
+ goto out;
if (cfg_truncate > 0) {
shutdown(fd, SHUT_WR);
@@ -1320,7 +1320,10 @@ again:
close(fd);
}
- return 0;
+out:
+ if (cfg_input)
+ close(fd_in);
+ return ret;
}
int parse_proto(const char *proto)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 13a3b68181ee..befa66f5a366 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1441,6 +1441,15 @@ chk_join_nr()
fi
fi
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "0" ]; then
+ rc=${KSFT_FAIL}
+ print_check "synack HMAC"
+ fail_test "got $count JOIN[s] synack HMAC failure expected 0"
+ fi
+
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
if [ -z "$count" ]; then
rc=${KSFT_SKIP}
@@ -1450,6 +1459,15 @@ chk_join_nr()
fail_test "got $count JOIN[s] ack rx expected $ack_nr"
fi
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "0" ]; then
+ rc=${KSFT_FAIL}
+ print_check "ack HMAC"
+ fail_test "got $count JOIN[s] ack HMAC failure expected 0"
+ fi
+
print_results "join Rx" ${rc}
join_syn_tx="${join_syn_tx:-${syn_nr}}" \
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 47088b005390..1f5979c1510c 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add reload net_port_proto_match"
+BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
@@ -387,6 +387,25 @@ race_repeat 0
perf_duration 0
"
+
+TYPE_avx2_mismatch="
+display avx2 false match
+type_spec inet_proto . ipv6_addr
+chain_spec meta l4proto . ip6 daddr
+dst proto addr6
+src
+start 1
+count 1
+src_delta 1
+tools ping
+proto icmp6
+
+race_repeat 0
+
+perf_duration 0
+"
+
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -1629,6 +1648,24 @@ test_bug_net_port_proto_match() {
nft flush ruleset
}
+test_bug_avx2_mismatch()
+{
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ local a1="fe80:dead:01ff:0a02:0b03:6007:8009:a001"
+ local a2="fe80:dead:01fe:0a02:0b03:6007:8009:a001"
+
+ nft "add element inet filter test { icmpv6 . $a1 }"
+
+ dst_addr6="$a2"
+ send_icmp6
+
+ if [ "$(count_packets)" -gt "0" ]; then
+ err "False match for $a2"
+ return 1
+ fi
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}
diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh
index 0be1905d1f2f..38871bdef67f 100755
--- a/tools/testing/selftests/net/netns-name.sh
+++ b/tools/testing/selftests/net/netns-name.sh
@@ -7,10 +7,12 @@ set -o pipefail
DEV=dummy-dev0
DEV2=dummy-dev1
ALT_NAME=some-alt-name
+NSIM_ADDR=2025
RET_CODE=0
cleanup() {
+ cleanup_netdevsim $NSIM_ADDR
cleanup_ns $NS $test_ns
}
@@ -25,12 +27,15 @@ setup_ns NS test_ns
#
# Test basic move without a rename
+# Use netdevsim because it has extra asserts for notifiers.
#
-ip -netns $NS link add name $DEV type dummy || fail
-ip -netns $NS link set dev $DEV netns $test_ns ||
+
+nsim=$(create_netdevsim $NSIM_ADDR $NS)
+ip -netns $NS link set dev $nsim netns $test_ns ||
fail "Can't perform a netns move"
-ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move"
-ip -netns $test_ns link del $DEV || fail
+ip -netns $test_ns link show dev $nsim >> /dev/null ||
+ fail "Device not found after move"
+cleanup_netdevsim $NSIM_ADDR
#
# Test move with a conflict
diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py
index 80950888800b..e9ad5e88da97 100755
--- a/tools/testing/selftests/net/rtnetlink.py
+++ b/tools/testing/selftests/net/rtnetlink.py
@@ -12,10 +12,10 @@ def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None:
At least the loopback interface should have this address.
"""
- addresses = rtnl.getmaddrs({"ifa-family": socket.AF_INET}, dump=True)
+ addresses = rtnl.getmulticast({"ifa-family": socket.AF_INET}, dump=True)
all_host_multicasts = [
- addr for addr in addresses if addr['ifa-multicast'] == IPV4_ALL_HOSTS_MULTICAST
+ addr for addr in addresses if addr['multicast'] == IPV4_ALL_HOSTS_MULTICAST
]
ksft_ge(len(all_host_multicasts), 1,
diff --git a/tools/testing/selftests/net/skf_net_off.c b/tools/testing/selftests/net/skf_net_off.c
new file mode 100644
index 000000000000..1fdf61d6cd7f
--- /dev/null
+++ b/tools/testing/selftests/net/skf_net_off.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Open a tun device.
+ *
+ * [modifications: use IFF_NAPI_FRAGS, add sk filter]
+ *
+ * Expects the device to have been configured previously, e.g.:
+ * sudo ip tuntap add name tap1 mode tap
+ * sudo ip link set tap1 up
+ * sudo ip link set dev tap1 addr 02:00:00:00:00:01
+ * sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
+ *
+ * And to avoid premature pskb_may_pull:
+ *
+ * sudo ethtool -K tap1 gro off
+ * sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux'
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if.h>
+#include <linux/if_packet.h>
+#include <linux/if_tun.h>
+#include <linux/ipv6.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+static bool cfg_do_filter;
+static bool cfg_do_frags;
+static int cfg_dst_port = 8000;
+static char *cfg_ifname;
+
+static int tun_open(const char *tun_name)
+{
+ struct ifreq ifr = {0};
+ int fd, ret;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd == -1)
+ error(1, errno, "open /dev/net/tun");
+
+ ifr.ifr_flags = IFF_TAP;
+ if (cfg_do_frags)
+ ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS;
+
+ strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1);
+
+ ret = ioctl(fd, TUNSETIFF, &ifr);
+ if (ret)
+ error(1, ret, "ioctl TUNSETIFF");
+
+ return fd;
+}
+
+static void sk_set_filter(int fd)
+{
+ const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt);
+ const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest);
+
+ /* Filter UDP packets with destination port cfg_dst_port */
+ struct sock_filter filter_code[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+
+ struct sock_fprog filter = {
+ sizeof(filter_code) / sizeof(filter_code[0]),
+ filter_code,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)))
+ error(1, errno, "setsockopt attach filter");
+}
+
+static int raw_open(void)
+{
+ int fd;
+
+ fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP);
+ if (fd == -1)
+ error(1, errno, "socket raw (udp)");
+
+ if (cfg_do_filter)
+ sk_set_filter(fd);
+
+ return fd;
+}
+
+static void tun_write(int fd)
+{
+ const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 };
+ const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 };
+ struct tun_pi pi = {0};
+ struct ipv6hdr ip6h = {0};
+ struct udphdr uh = {0};
+ struct ethhdr eth = {0};
+ uint32_t payload;
+ struct iovec iov[5];
+ int ret;
+
+ pi.proto = htons(ETH_P_IPV6);
+
+ memcpy(eth.h_source, eth_src, sizeof(eth_src));
+ memcpy(eth.h_dest, eth_dst, sizeof(eth_dst));
+ eth.h_proto = htons(ETH_P_IPV6);
+
+ ip6h.version = 6;
+ ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t));
+ ip6h.nexthdr = IPPROTO_UDP;
+ ip6h.hop_limit = 8;
+ if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1)
+ error(1, errno, "inet_pton src");
+ if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1)
+ error(1, errno, "inet_pton src");
+
+ uh.source = htons(8000);
+ uh.dest = htons(cfg_dst_port);
+ uh.len = ip6h.payload_len;
+ uh.check = 0;
+
+ payload = htonl(0xABABABAB); /* Covered in IPv6 length */
+
+ iov[0].iov_base = &pi;
+ iov[0].iov_len = sizeof(pi);
+ iov[1].iov_base = &eth;
+ iov[1].iov_len = sizeof(eth);
+ iov[2].iov_base = &ip6h;
+ iov[2].iov_len = sizeof(ip6h);
+ iov[3].iov_base = &uh;
+ iov[3].iov_len = sizeof(uh);
+ iov[4].iov_base = &payload;
+ iov[4].iov_len = sizeof(payload);
+
+ ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0]));
+ if (ret <= 0)
+ error(1, errno, "writev");
+}
+
+static void raw_read(int fd)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ struct msghdr msg = {0};
+ struct iovec iov[2];
+ struct udphdr uh;
+ uint32_t payload[2];
+ int ret;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcvtimeo udp");
+
+ iov[0].iov_base = &uh;
+ iov[0].iov_len = sizeof(uh);
+
+ iov[1].iov_base = payload;
+ iov[1].iov_len = sizeof(payload);
+
+ msg.msg_iov = iov;
+ msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]);
+
+ ret = recvmsg(fd, &msg, 0);
+ if (ret <= 0)
+ error(1, errno, "read raw");
+ if (ret != sizeof(uh) + sizeof(payload[0]))
+ error(1, errno, "read raw: len=%d\n", ret);
+
+ fprintf(stderr, "raw recv: 0x%x\n", payload[0]);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "fFi:")) != -1) {
+ switch (c) {
+ case 'f':
+ cfg_do_filter = true;
+ printf("bpf filter enabled\n");
+ break;
+ case 'F':
+ cfg_do_frags = true;
+ printf("napi frags mode enabled\n");
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ default:
+ error(1, 0, "unknown option %c", optopt);
+ break;
+ }
+ }
+
+ if (!cfg_ifname)
+ error(1, 0, "must specify tap interface name (-i)");
+}
+
+int main(int argc, char **argv)
+{
+ int fdt, fdr;
+
+ parse_opts(argc, argv);
+
+ fdr = raw_open();
+ fdt = tun_open(cfg_ifname);
+
+ tun_write(fdt);
+ raw_read(fdr);
+
+ if (close(fdt))
+ error(1, errno, "close tun");
+ if (close(fdr))
+ error(1, errno, "close udp");
+
+ fprintf(stderr, "OK\n");
+ return 0;
+}
+
diff --git a/tools/testing/selftests/net/skf_net_off.sh b/tools/testing/selftests/net/skf_net_off.sh
new file mode 100755
index 000000000000..5da5066fb465
--- /dev/null
+++ b/tools/testing/selftests/net/skf_net_off.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly NS="ns-$(mktemp -u XXXXXX)"
+
+cleanup() {
+ ip netns del $NS
+}
+
+ip netns add $NS
+trap cleanup EXIT
+
+ip -netns $NS link set lo up
+ip -netns $NS tuntap add name tap1 mode tap
+ip -netns $NS link set tap1 up
+ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01
+ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
+ip netns exec $NS ethtool -K tap1 gro off
+
+# disable early demux, else udp_v6_early_demux pulls udp header into linear
+ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0
+
+echo "no filter"
+ip netns exec $NS ./skf_net_off -i tap1
+
+echo "filter, linear skb (-f)"
+ip netns exec $NS ./skf_net_off -i tap1 -f
+
+echo "filter, fragmented skb (-f) (-F)"
+ip netns exec $NS ./skf_net_off -i tap1 -f -F
diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c
index 73b2f2276f3f..2c73bea698a6 100644
--- a/tools/testing/selftests/net/tcp_ao/self-connect.c
+++ b/tools/testing/selftests/net/tcp_ao/self-connect.c
@@ -16,6 +16,9 @@ static void __setup_lo_intf(const char *lo_intf,
if (link_set_up(lo_intf))
test_error("Failed to bring %s up", lo_intf);
+
+ if (ip_route_add(lo_intf, TEST_FAMILY, local_addr, local_addr))
+ test_error("Failed to add a local route %s", lo_intf);
}
static void setup_lo_intf(const char *lo_intf)
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 9a85f93c33d8..5ded3b3a7538 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -1753,6 +1753,42 @@ TEST_F(tls_basic, rekey_tx)
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
+TEST_F(tls_basic, disconnect)
+{
+ char const *test_str = "test_message";
+ int send_len = strlen(test_str) + 1;
+ struct tls_crypto_info_keys key;
+ struct sockaddr_in addr;
+ char buf[20];
+ int ret;
+
+ if (self->notls)
+ return;
+
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &key, 0);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len);
+ ASSERT_EQ(ret, 0);
+
+ /* Pre-queue the data so that setsockopt parses it but doesn't
+ * dequeue it from the TCP socket. recvmsg would dequeue.
+ */
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len);
+ ASSERT_EQ(ret, 0);
+
+ addr.sin_family = AF_UNSPEC;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = 0;
+ ret = connect(self->cfd, &addr, sizeof(addr));
+ EXPECT_EQ(ret, -1);
+ EXPECT_EQ(errno, EOPNOTSUPP);
+
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+}
+
TEST_F(tls, rekey)
{
char const *test_str_1 = "test_message_before_rekey";
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index c51ea90a1395..815fad8c53a8 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
cleanup() {
local -r jobs="$(jobs -p)"
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 17404f49cdb6..5f3d1a110d11 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
cleanup() {
local -r jobs="$(jobs -p)"
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 550d8eb3e224..f22f6c66997e 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -3,7 +3,7 @@
source net_helper.sh
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
readonly BASE="ns-$(mktemp -u XXXXXX)"
readonly SRC=2
readonly DST=1
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 6bb7dfaa30b6..9709dd067c72 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
readonly BASE=`basename $STATS`
readonly SRC=2
diff --git a/tools/testing/selftests/net/xdp_dummy.bpf.c b/tools/testing/selftests/net/xdp_dummy.bpf.c
deleted file mode 100644
index d988b2e0cee8..000000000000
--- a/tools/testing/selftests/net/xdp_dummy.bpf.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#define KBUILD_MODNAME "xdp_dummy"
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-SEC("xdp")
-int xdp_dummy_prog(struct xdp_md *ctx)
-{
- return XDP_PASS;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile
index 48ec048f47af..277f92f9d753 100644
--- a/tools/testing/selftests/pcie_bwctrl/Makefile
+++ b/tools/testing/selftests/pcie_bwctrl/Makefile
@@ -1,2 +1,3 @@
-TEST_PROGS = set_pcie_cooling_state.sh set_pcie_speed.sh
+TEST_PROGS = set_pcie_cooling_state.sh
+TEST_FILES = set_pcie_speed.sh
include ../lib.mk
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index cec22aa11cdf..55bcf81a2b9a 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -32,19 +32,19 @@
#endif
#ifndef __NR_pidfd_open
-#define __NR_pidfd_open -1
+#define __NR_pidfd_open 434
#endif
#ifndef __NR_pidfd_send_signal
-#define __NR_pidfd_send_signal -1
+#define __NR_pidfd_send_signal 424
#endif
#ifndef __NR_clone3
-#define __NR_clone3 -1
+#define __NR_clone3 435
#endif
#ifndef __NR_pidfd_getfd
-#define __NR_pidfd_getfd -1
+#define __NR_pidfd_getfd 438
#endif
#ifndef PIDFD_NONBLOCK
diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c
index a40541bb7c7d..5e96ef785d0d 100644
--- a/tools/testing/selftests/riscv/hwprobe/cbo.c
+++ b/tools/testing/selftests/riscv/hwprobe/cbo.c
@@ -50,6 +50,14 @@ static void cbo_clean(char *base) { cbo_insn(base, 1); }
static void cbo_flush(char *base) { cbo_insn(base, 2); }
static void cbo_zero(char *base) { cbo_insn(base, 4); }
+static void test_no_cbo_inval(void *arg)
+{
+ ksft_print_msg("Testing cbo.inval instruction remain privileged\n");
+ illegal_insn = false;
+ cbo_inval(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.inval\n");
+}
+
static void test_no_zicbom(void *arg)
{
ksft_print_msg("Testing Zicbom instructions remain privileged\n");
@@ -61,10 +69,6 @@ static void test_no_zicbom(void *arg)
illegal_insn = false;
cbo_flush(&mem[0]);
ksft_test_result(illegal_insn, "No cbo.flush\n");
-
- illegal_insn = false;
- cbo_inval(&mem[0]);
- ksft_test_result(illegal_insn, "No cbo.inval\n");
}
static void test_no_zicboz(void *arg)
@@ -81,6 +85,30 @@ static bool is_power_of_2(__u64 n)
return n != 0 && (n & (n - 1)) == 0;
}
+static void test_zicbom(void *arg)
+{
+ struct riscv_hwprobe pair = {
+ .key = RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE,
+ };
+ cpu_set_t *cpus = (cpu_set_t *)arg;
+ __u64 block_size;
+ long rc;
+
+ rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0);
+ block_size = pair.value;
+ ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOM_BLOCK_SIZE &&
+ is_power_of_2(block_size), "Zicbom block size\n");
+ ksft_print_msg("Zicbom block size: %llu\n", block_size);
+
+ illegal_insn = false;
+ cbo_clean(&mem[block_size]);
+ ksft_test_result(!illegal_insn, "cbo.clean\n");
+
+ illegal_insn = false;
+ cbo_flush(&mem[block_size]);
+ ksft_test_result(!illegal_insn, "cbo.flush\n");
+}
+
static void test_zicboz(void *arg)
{
struct riscv_hwprobe pair = {
@@ -129,7 +157,7 @@ static void test_zicboz(void *arg)
ksft_test_result_pass("cbo.zero check\n");
}
-static void check_no_zicboz_cpus(cpu_set_t *cpus)
+static void check_no_zicbo_cpus(cpu_set_t *cpus, __u64 cbo)
{
struct riscv_hwprobe pair = {
.key = RISCV_HWPROBE_KEY_IMA_EXT_0,
@@ -137,6 +165,7 @@ static void check_no_zicboz_cpus(cpu_set_t *cpus)
cpu_set_t one_cpu;
int i = 0, c = 0;
long rc;
+ char *cbostr;
while (i++ < CPU_COUNT(cpus)) {
while (!CPU_ISSET(c, cpus))
@@ -148,10 +177,13 @@ static void check_no_zicboz_cpus(cpu_set_t *cpus)
rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0);
assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
- if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ)
- ksft_exit_fail_msg("Zicboz is only present on a subset of harts.\n"
- "Use taskset to select a set of harts where Zicboz\n"
- "presence (present or not) is consistent for each hart\n");
+ cbostr = cbo == RISCV_HWPROBE_EXT_ZICBOZ ? "Zicboz" : "Zicbom";
+
+ if (pair.value & cbo)
+ ksft_exit_fail_msg("%s is only present on a subset of harts.\n"
+ "Use taskset to select a set of harts where %s\n"
+ "presence (present or not) is consistent for each hart\n",
+ cbostr, cbostr);
++c;
}
}
@@ -159,7 +191,9 @@ static void check_no_zicboz_cpus(cpu_set_t *cpus)
enum {
TEST_ZICBOZ,
TEST_NO_ZICBOZ,
+ TEST_ZICBOM,
TEST_NO_ZICBOM,
+ TEST_NO_CBO_INVAL,
};
static struct test_info {
@@ -169,7 +203,9 @@ static struct test_info {
} tests[] = {
[TEST_ZICBOZ] = { .nr_tests = 3, test_zicboz },
[TEST_NO_ZICBOZ] = { .nr_tests = 1, test_no_zicboz },
- [TEST_NO_ZICBOM] = { .nr_tests = 3, test_no_zicbom },
+ [TEST_ZICBOM] = { .nr_tests = 3, test_zicbom },
+ [TEST_NO_ZICBOM] = { .nr_tests = 2, test_no_zicbom },
+ [TEST_NO_CBO_INVAL] = { .nr_tests = 1, test_no_cbo_inval },
};
int main(int argc, char **argv)
@@ -189,6 +225,7 @@ int main(int argc, char **argv)
assert(rc == 0);
tests[TEST_NO_ZICBOZ].enabled = true;
tests[TEST_NO_ZICBOM].enabled = true;
+ tests[TEST_NO_CBO_INVAL].enabled = true;
}
rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus);
@@ -206,7 +243,14 @@ int main(int argc, char **argv)
tests[TEST_ZICBOZ].enabled = true;
tests[TEST_NO_ZICBOZ].enabled = false;
} else {
- check_no_zicboz_cpus(&cpus);
+ check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOZ);
+ }
+
+ if (pair.value & RISCV_HWPROBE_EXT_ZICBOM) {
+ tests[TEST_ZICBOM].enabled = true;
+ tests[TEST_NO_ZICBOM].enabled = false;
+ } else {
+ check_no_zicbo_cpus(&cpus, RISCV_HWPROBE_EXT_ZICBOM);
}
for (i = 0; i < ARRAY_SIZE(tests); ++i)
diff --git a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
index 35c0812e32de..4dde05e45a04 100644
--- a/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
+++ b/tools/testing/selftests/riscv/vector/v_exec_initval_nolibc.c
@@ -6,7 +6,7 @@
* the values. To further ensure consistency, this file is compiled without
* libc and without auto-vectorization.
*
- * To be "clean" all values must be either all ones or all zeroes.
+ * To be "clean" all values must be all zeroes.
*/
#define __stringify_1(x...) #x
@@ -14,9 +14,8 @@
int main(int argc, char **argv)
{
- char prev_value = 0, value;
+ char value = 0;
unsigned long vl;
- int first = 1;
if (argc > 2 && strcmp(argv[2], "x"))
asm volatile (
@@ -44,14 +43,11 @@ int main(int argc, char **argv)
"vsrl.vi " __stringify(register) ", " __stringify(register) ", 8\n\t" \
".option pop\n\t" \
: "=r" (value)); \
- if (first) { \
- first = 0; \
- } else if (value != prev_value || !(value == 0x00 || value == 0xff)) { \
+ if (value != 0x00) { \
printf("Register " __stringify(register) \
" values not clean! value: %u\n", value); \
exit(-1); \
} \
- prev_value = value; \
} \
})
diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore
index fb2d533aa575..a2afe7994e85 100644
--- a/tools/testing/selftests/rtc/.gitignore
+++ b/tools/testing/selftests/rtc/.gitignore
@@ -1,3 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
rtctest
-setdate
diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile
index 9dbb395c5c79..547c244a2ca5 100644
--- a/tools/testing/selftests/rtc/Makefile
+++ b/tools/testing/selftests/rtc/Makefile
@@ -4,8 +4,6 @@ LDLIBS += -lrt -lpthread -lm
TEST_GEN_PROGS = rtctest
-TEST_GEN_PROGS_EXTENDED = setdate
-
TEST_FILES := settings
include ../lib.mk
diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c
index e103097d0b5b..be175c0e6ae3 100644
--- a/tools/testing/selftests/rtc/rtctest.c
+++ b/tools/testing/selftests/rtc/rtctest.c
@@ -29,6 +29,7 @@ enum rtc_alarm_state {
RTC_ALARM_UNKNOWN,
RTC_ALARM_ENABLED,
RTC_ALARM_DISABLED,
+ RTC_ALARM_RES_MINUTE,
};
FIXTURE(rtc) {
@@ -88,7 +89,7 @@ static void nanosleep_with_retries(long ns)
}
}
-static enum rtc_alarm_state get_rtc_alarm_state(int fd)
+static enum rtc_alarm_state get_rtc_alarm_state(int fd, int need_seconds)
{
struct rtc_param param = { 0 };
int rc;
@@ -103,6 +104,10 @@ static enum rtc_alarm_state get_rtc_alarm_state(int fd)
if ((param.uvalue & _BITUL(RTC_FEATURE_ALARM)) == 0)
return RTC_ALARM_DISABLED;
+ /* Check if alarm has desired granularity */
+ if (need_seconds && (param.uvalue & _BITUL(RTC_FEATURE_ALARM_RES_MINUTE)))
+ return RTC_ALARM_RES_MINUTE;
+
return RTC_ALARM_ENABLED;
}
@@ -227,9 +232,11 @@ TEST_F(rtc, alarm_alm_set) {
SKIP(return, "Skipping test since %s does not exist", rtc_file);
ASSERT_NE(-1, self->fd);
- alarm_state = get_rtc_alarm_state(self->fd);
+ alarm_state = get_rtc_alarm_state(self->fd, 1);
if (alarm_state == RTC_ALARM_DISABLED)
SKIP(return, "Skipping test since alarms are not supported.");
+ if (alarm_state == RTC_ALARM_RES_MINUTE)
+ SKIP(return, "Skipping test since alarms has only minute granularity.");
rc = ioctl(self->fd, RTC_RD_TIME, &tm);
ASSERT_NE(-1, rc);
@@ -295,9 +302,11 @@ TEST_F(rtc, alarm_wkalm_set) {
SKIP(return, "Skipping test since %s does not exist", rtc_file);
ASSERT_NE(-1, self->fd);
- alarm_state = get_rtc_alarm_state(self->fd);
+ alarm_state = get_rtc_alarm_state(self->fd, 1);
if (alarm_state == RTC_ALARM_DISABLED)
SKIP(return, "Skipping test since alarms are not supported.");
+ if (alarm_state == RTC_ALARM_RES_MINUTE)
+ SKIP(return, "Skipping test since alarms has only minute granularity.");
rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
ASSERT_NE(-1, rc);
@@ -357,7 +366,7 @@ TEST_F_TIMEOUT(rtc, alarm_alm_set_minute, 65) {
SKIP(return, "Skipping test since %s does not exist", rtc_file);
ASSERT_NE(-1, self->fd);
- alarm_state = get_rtc_alarm_state(self->fd);
+ alarm_state = get_rtc_alarm_state(self->fd, 0);
if (alarm_state == RTC_ALARM_DISABLED)
SKIP(return, "Skipping test since alarms are not supported.");
@@ -425,7 +434,7 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) {
SKIP(return, "Skipping test since %s does not exist", rtc_file);
ASSERT_NE(-1, self->fd);
- alarm_state = get_rtc_alarm_state(self->fd);
+ alarm_state = get_rtc_alarm_state(self->fd, 0);
if (alarm_state == RTC_ALARM_DISABLED)
SKIP(return, "Skipping test since alarms are not supported.");
diff --git a/tools/testing/selftests/rtc/setdate.c b/tools/testing/selftests/rtc/setdate.c
deleted file mode 100644
index b303890b3de2..000000000000
--- a/tools/testing/selftests/rtc/setdate.c
+++ /dev/null
@@ -1,77 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/* Real Time Clock Driver Test
- * by: Benjamin Gaignard (benjamin.gaignard@linaro.org)
- *
- * To build
- * gcc rtctest_setdate.c -o rtctest_setdate
- */
-
-#include <stdio.h>
-#include <linux/rtc.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <errno.h>
-
-static const char default_time[] = "00:00:00";
-
-int main(int argc, char **argv)
-{
- int fd, retval;
- struct rtc_time new, current;
- const char *rtc, *date;
- const char *time = default_time;
-
- switch (argc) {
- case 4:
- time = argv[3];
- /* FALLTHROUGH */
- case 3:
- date = argv[2];
- rtc = argv[1];
- break;
- default:
- fprintf(stderr, "usage: rtctest_setdate <rtcdev> <DD-MM-YYYY> [HH:MM:SS]\n");
- return 1;
- }
-
- fd = open(rtc, O_RDONLY);
- if (fd == -1) {
- perror(rtc);
- exit(errno);
- }
-
- sscanf(date, "%d-%d-%d", &new.tm_mday, &new.tm_mon, &new.tm_year);
- new.tm_mon -= 1;
- new.tm_year -= 1900;
- sscanf(time, "%d:%d:%d", &new.tm_hour, &new.tm_min, &new.tm_sec);
-
- fprintf(stderr, "Test will set RTC date/time to %d-%d-%d, %02d:%02d:%02d.\n",
- new.tm_mday, new.tm_mon + 1, new.tm_year + 1900,
- new.tm_hour, new.tm_min, new.tm_sec);
-
- /* Write the new date in RTC */
- retval = ioctl(fd, RTC_SET_TIME, &new);
- if (retval == -1) {
- perror("RTC_SET_TIME ioctl");
- close(fd);
- exit(errno);
- }
-
- /* Read back */
- retval = ioctl(fd, RTC_RD_TIME, &current);
- if (retval == -1) {
- perror("RTC_RD_TIME ioctl");
- exit(errno);
- }
-
- fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
- current.tm_mday, current.tm_mon + 1, current.tm_year + 1900,
- current.tm_hour, current.tm_min, current.tm_sec);
-
- close(fd);
- return 0;
-}
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
index ee2792998c89..4f21aeb8a3fb 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
@@ -305,7 +305,7 @@
"cmdUnderTest": "$TC actions add action nat ingress default 10.10.10.1 index 12",
"expExitCode": "0",
"verifyCmd": "$TC actions get action nat index 12",
- "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+ "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref",
"matchCount": "1",
"teardown": [
"$TC actions flush action nat"
@@ -332,7 +332,7 @@
"cmdUnderTest": "$TC actions add action nat ingress any 10.10.10.1 index 12",
"expExitCode": "0",
"verifyCmd": "$TC actions get action nat index 12",
- "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+ "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref",
"matchCount": "1",
"teardown": [
"$TC actions flush action nat"
@@ -359,7 +359,7 @@
"cmdUnderTest": "$TC actions add action nat ingress all 10.10.10.1 index 12",
"expExitCode": "0",
"verifyCmd": "$TC actions get action nat index 12",
- "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+ "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/0 10.10.10.1 pass.*index 12 ref",
"matchCount": "1",
"teardown": [
"$TC actions flush action nat"
@@ -548,7 +548,7 @@
"cmdUnderTest": "$TC actions add action nat egress default 20.20.20.1 pipe index 10",
"expExitCode": "0",
"verifyCmd": "$TC actions get action nat index 10",
- "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+ "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref",
"matchCount": "1",
"teardown": [
"$TC actions flush action nat"
@@ -575,7 +575,7 @@
"cmdUnderTest": "$TC actions add action nat egress any 20.20.20.1 pipe index 10",
"expExitCode": "0",
"verifyCmd": "$TC actions get action nat index 10",
- "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+ "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref",
"matchCount": "1",
"teardown": [
"$TC actions flush action nat"
@@ -602,7 +602,7 @@
"cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10",
"expExitCode": "0",
"verifyCmd": "$TC actions get action nat index 10",
- "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+ "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref",
"matchCount": "1",
"teardown": [
"$TC actions flush action nat"
@@ -629,7 +629,7 @@
"cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10 cookie aa1bc2d3eeff112233445566778800a1",
"expExitCode": "0",
"verifyCmd": "$TC actions get action nat index 10",
- "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1",
+ "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/0 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1",
"matchCount": "1",
"teardown": [
"$TC actions flush action nat"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json
index 1ba96c467754..d9fc62ab476c 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json
@@ -412,5 +412,27 @@
"teardown": [
"$TC qdisc del dev $DUMMY ingress"
]
+ },
+ {
+ "id": "33f4",
+ "name": "Check echo of big filter command",
+ "category": [
+ "infra",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY parent root handle 10: fq_codel"
+ ],
+ "cmdUnderTest": "bash -c '$TC -echo filter add dev $DUMMY parent 10: u32 match u32 0 0 $(for i in $(seq 32); do echo action pedit munge ip dport set 22; done) | grep \"added filter\"'",
+ "verifyCmd": "",
+ "expExitCode": "0",
+ "matchCount": "0",
+ "matchPattern": "",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY parent root fq_codel"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index 9044ac054167..a951c0d33cd2 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -126,5 +126,452 @@
"$TC qdisc del dev $DUMMY root handle 1: drr",
"$IP addr del 10.10.10.10/24 dev $DUMMY"
]
- }
+ },
+ {
+ "id": "c024",
+ "name": "Test TBF with SKBPRIO - catch qlen corner cases",
+ "category": [
+ "qdisc",
+ "tbf",
+ "skbprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1: root tbf rate 100bit burst 2000 limit 1000",
+ "$TC qdisc add dev $DUMMY parent 1: handle 10: skbprio limit 1",
+ "ping -c 1 -W 0.1 -Q 0x00 -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "ping -c 1 -W 0.1 -Q 0x1c -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "ping -c 1 -W 0.1 -Q 0x00 -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "ping -c 1 -W 0.1 -Q 0x1c -s 1400 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "sleep 0.5"
+ ],
+ "cmdUnderTest": "$TC -s qdisc show dev $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc skbprio'",
+ "matchPattern": "dropped [1-9][0-9]*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "a4bb",
+ "name": "Test FQ_CODEL with HTB parent - force packet drop with empty queue",
+ "category": [
+ "qdisc",
+ "fq_codel",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1: root htb default 10",
+ "$TC class add dev $DUMMY parent 1: classid 1:10 htb rate 1kbit",
+ "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms",
+ "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10",
+ "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "sleep 0.1"
+ ],
+ "cmdUnderTest": "$TC -s qdisc show dev $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'",
+ "matchPattern": "dropped [1-9][0-9]*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "a4be",
+ "name": "Test FQ_CODEL with QFQ parent - force packet drop with empty queue",
+ "category": [
+ "qdisc",
+ "fq_codel",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1: root qfq",
+ "$TC class add dev $DUMMY parent 1: classid 1:10 qfq weight 1 maxpkt 1000",
+ "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms",
+ "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10",
+ "ping -c 10 -s 1000 -f -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "sleep 0.1"
+ ],
+ "cmdUnderTest": "$TC -s qdisc show dev $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'",
+ "matchPattern": "dropped [1-9][0-9]*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "a4bf",
+ "name": "Test FQ_CODEL with HFSC parent - force packet drop with empty queue",
+ "category": [
+ "qdisc",
+ "fq_codel",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1: root hfsc default 10",
+ "$TC class add dev $DUMMY parent 1: classid 1:10 hfsc sc rate 1kbit ul rate 1kbit",
+ "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms",
+ "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10",
+ "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "sleep 0.1"
+ ],
+ "cmdUnderTest": "$TC -s qdisc show dev $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'",
+ "matchPattern": "dropped [1-9][0-9]*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "a4c0",
+ "name": "Test FQ_CODEL with DRR parent - force packet drop with empty queue",
+ "category": [
+ "qdisc",
+ "fq_codel",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1: root drr",
+ "$TC class add dev $DUMMY parent 1: classid 1:10 drr quantum 1500",
+ "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms",
+ "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10",
+ "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "sleep 0.1"
+ ],
+ "cmdUnderTest": "$TC -s qdisc show dev $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'",
+ "matchPattern": "dropped [1-9][0-9]*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "a4c1",
+ "name": "Test FQ_CODEL with ETS parent - force packet drop with empty queue",
+ "category": [
+ "qdisc",
+ "fq_codel",
+ "ets"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 strict 1",
+ "$TC class change dev $DUMMY parent 1: classid 1:1 ets",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms",
+ "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1",
+ "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "sleep 0.1"
+ ],
+ "cmdUnderTest": "$TC -s qdisc show dev $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'",
+ "matchPattern": "dropped [1-9][0-9]*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "a4c3",
+ "name": "Test HFSC with netem/blackhole - queue emptying during peek operation",
+ "category": [
+ "qdisc",
+ "hfsc",
+ "netem",
+ "blackhole"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root drr",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:2 drr",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 plug limit 1024",
+ "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 hfsc default 1",
+ "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit",
+ "$TC qdisc add dev $DUMMY parent 3:1 handle 4:0 netem delay 1ms",
+ "$TC qdisc add dev $DUMMY parent 4:1 handle 5:0 blackhole",
+ "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true",
+ "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit",
+ "$TC class del dev $DUMMY parent 3:0 classid 3:1",
+ "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit",
+ "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hfsc 3:.*parent 1:2.*default 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "90ec",
+ "name": "Test DRR's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root drr",
+ "$TC class replace dev $DUMMY parent 1:0 classid 1:1 drr",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1"
+ ],
+ "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0",
+ "matchJSON": [
+ {
+ "kind": "drr",
+ "handle": "1:",
+ "bytes": 196,
+ "packets": 2
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "1f1f",
+ "name": "Test ETS's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root ets bands 2",
+ "$TC class replace dev $DUMMY parent 1:0 classid 1:1 ets quantum 1500",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1"
+ ],
+ "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s class show dev $DUMMY",
+ "matchJSON": [
+ {
+ "class": "ets",
+ "handle": "1:1",
+ "stats": {
+ "bytes": 196,
+ "packets": 2
+ }
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "5e6d",
+ "name": "Test QFQ's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root qfq",
+ "$TC class replace dev $DUMMY parent 1:0 classid 1:1 qfq weight 100 maxpkt 1500",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1"
+ ],
+ "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0",
+ "matchJSON": [
+ {
+ "kind": "qfq",
+ "handle": "1:",
+ "bytes": 196,
+ "packets": 2
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "bf1d",
+ "name": "Test HFSC's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root hfsc",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc ls m2 10Mbit",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit",
+ "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2",
+ "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true",
+ "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1",
+ "$TC class del dev $DUMMY classid 1:1"
+ ],
+ "cmdUnderTest": "ping -c 1 10.10.10.2 -I$DUMMY > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0",
+ "matchJSON": [
+ {
+ "kind": "hfsc",
+ "handle": "1:",
+ "bytes": 392,
+ "packets": 4
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "7c3b",
+ "name": "Test nested DRR's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root drr",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1",
+ "$TC qdisc add dev $DUMMY handle 2:0 parent 1:1 drr",
+ "$TC class add dev $DUMMY classid 2:1 parent 2:0 drr",
+ "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 2:1",
+ "$TC qdisc add dev $DUMMY parent 2:1 handle 3:0 netem duplicate 100%"
+ ],
+ "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0",
+ "matchJSON": [
+ {
+ "kind": "drr",
+ "handle": "1:",
+ "bytes": 196,
+ "packets": 2
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "62c4",
+ "name": "Test HTB with FQ_CODEL - basic functionality",
+ "category": [
+ "qdisc",
+ "htb",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 root handle 1: htb default 11",
+ "$TC class add dev $DEV1 parent 1: classid 1:1 htb rate 10kbit",
+ "$TC class add dev $DEV1 parent 1:1 classid 1:11 htb rate 10kbit prio 0 quantum 1486",
+ "$TC qdisc add dev $DEV1 parent 1:11 fq_codel quantum 300 noecn",
+ "sleep 0.5"
+ ],
+ "scapy": {
+ "iface": "$DEV0",
+ "count": 5,
+ "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/TCP(sport=12345, dport=80)"
+ },
+ "cmdUnderTest": "$TC -s qdisc show dev $DEV1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DEV1 | grep -A 5 'qdisc fq_codel'",
+ "matchPattern": "Sent [0-9]+ bytes [0-9]+ pkt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 handle 1: root"
+ ]
+ }
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
index 50e8d72781cb..28c6ce6da7db 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
@@ -228,5 +228,41 @@
"matchCount": "0",
"teardown": [
]
+ },
+ {
+ "id": "7f8f",
+ "name": "Check that a derived limit of 1 is rejected (limit 2 depth 1 flows 1)",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 2 depth 1 flows 1",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "sfq",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "5168",
+ "name": "Check that a derived limit of 1 is rejected (limit 2 depth 1 divisor 1)",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 2 depth 1 divisor 1",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "sfq",
+ "matchCount": "0",
+ "teardown": []
}
]
diff --git a/tools/testing/selftests/tpm2/.gitignore b/tools/testing/selftests/tpm2/.gitignore
new file mode 100644
index 000000000000..6d6165c5e35d
--- /dev/null
+++ b/tools/testing/selftests/tpm2/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+AsyncTest.log
+SpaceTest.log
diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh
index 168f4b166234..3a60e6c6f5c9 100755
--- a/tools/testing/selftests/tpm2/test_smoke.sh
+++ b/tools/testing/selftests/tpm2/test_smoke.sh
@@ -6,6 +6,6 @@ ksft_skip=4
[ -e /dev/tpm0 ] || exit $ksft_skip
read tpm_version < /sys/class/tpm/tpm0/tpm_version_major
-[ "$tpm_version" == 2 ] || exit $ksft_skip
+[ "$tpm_version" = 2 ] || exit $ksft_skip
python3 -m unittest -v tpm2_tests.SmokeTest 2>&1
diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index 7817afe29005..f34ac0bac696 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -4,6 +4,12 @@ CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)
LDLIBS += -lpthread -lm -luring
TEST_PROGS := test_generic_01.sh
+TEST_PROGS += test_generic_02.sh
+TEST_PROGS += test_generic_03.sh
+TEST_PROGS += test_generic_04.sh
+TEST_PROGS += test_generic_05.sh
+TEST_PROGS += test_generic_06.sh
+TEST_PROGS += test_generic_07.sh
TEST_PROGS += test_null_01.sh
TEST_PROGS += test_null_02.sh
@@ -11,17 +17,24 @@ TEST_PROGS += test_loop_01.sh
TEST_PROGS += test_loop_02.sh
TEST_PROGS += test_loop_03.sh
TEST_PROGS += test_loop_04.sh
+TEST_PROGS += test_loop_05.sh
TEST_PROGS += test_stripe_01.sh
TEST_PROGS += test_stripe_02.sh
+TEST_PROGS += test_stripe_03.sh
+TEST_PROGS += test_stripe_04.sh
TEST_PROGS += test_stress_01.sh
TEST_PROGS += test_stress_02.sh
+TEST_PROGS += test_stress_03.sh
+TEST_PROGS += test_stress_04.sh
+TEST_PROGS += test_stress_05.sh
TEST_GEN_PROGS_EXTENDED = kublk
include ../lib.mk
-$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c
+$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c \
+ fault_inject.c
check:
shellcheck -x -f gcc *.sh
diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c
new file mode 100644
index 000000000000..94a8e729ba4c
--- /dev/null
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Fault injection ublk target. Hack this up however you like for
+ * testing specific behaviors of ublk_drv. Currently is a null target
+ * with a configurable delay before completing each I/O. This delay can
+ * be used to test ublk_drv's handling of I/O outstanding to the ublk
+ * server when it dies.
+ */
+
+#include "kublk.h"
+
+static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
+ struct ublk_dev *dev)
+{
+ const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
+ unsigned long dev_size = 250UL << 30;
+
+ dev->tgt.dev_size = dev_size;
+ dev->tgt.params = (struct ublk_params) {
+ .types = UBLK_PARAM_TYPE_BASIC,
+ .basic = {
+ .logical_bs_shift = 9,
+ .physical_bs_shift = 12,
+ .io_opt_shift = 12,
+ .io_min_shift = 9,
+ .max_sectors = info->max_io_buf_bytes >> 9,
+ .dev_sectors = dev_size >> 9,
+ },
+ };
+
+ dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000);
+ return 0;
+}
+
+static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+ struct io_uring_sqe *sqe;
+ struct __kernel_timespec ts = {
+ .tv_nsec = (long long)q->dev->private_data,
+ };
+
+ ublk_queue_alloc_sqes(q, &sqe, 1);
+ io_uring_prep_timeout(sqe, &ts, 1, 0);
+ sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, 1);
+
+ ublk_queued_tgt_io(q, tag, 1);
+
+ return 0;
+}
+
+static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag,
+ const struct io_uring_cqe *cqe)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+
+ if (cqe->res != -ETIME)
+ ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res);
+
+ if (ublk_completed_tgt_io(q, tag))
+ ublk_complete_io(q, tag, iod->nr_sectors << 9);
+ else
+ ublk_err("%s: io not complete after 1 cqe\n", __func__);
+}
+
+static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "delay_us", 1, NULL, 0 },
+ { 0, 0, 0, 0 }
+ };
+ int option_idx, opt;
+
+ ctx->fault_inject.delay_us = 0;
+ while ((opt = getopt_long(argc, argv, "",
+ longopts, &option_idx)) != -1) {
+ switch (opt) {
+ case 0:
+ if (!strcmp(longopts[option_idx].name, "delay_us"))
+ ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10);
+ }
+ }
+}
+
+static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops)
+{
+ printf("\tfault_inject: [--delay_us us (default 0)]\n");
+}
+
+const struct ublk_tgt_ops fault_inject_tgt_ops = {
+ .name = "fault_inject",
+ .init_tgt = ublk_fault_inject_tgt_init,
+ .queue_io = ublk_fault_inject_queue_io,
+ .tgt_io_done = ublk_fault_inject_tgt_io_done,
+ .parse_cmd_line = ublk_fault_inject_cmd_line,
+ .usage = ublk_fault_inject_usage,
+};
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 05147b53c361..842b40736a9b 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -5,22 +5,24 @@
#include "kublk.h"
+#define MAX_NR_TGT_ARG 64
+
unsigned int ublk_dbg_mask = UBLK_LOG;
static const struct ublk_tgt_ops *tgt_ops_list[] = {
&null_tgt_ops,
&loop_tgt_ops,
&stripe_tgt_ops,
+ &fault_inject_tgt_ops,
};
static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
{
- const struct ublk_tgt_ops *ops;
int i;
if (name == NULL)
return NULL;
- for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++)
+ for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++)
if (strcmp(tgt_ops_list[i]->name, name) == 0)
return tgt_ops_list[i];
return NULL;
@@ -99,7 +101,7 @@ static int __ublk_ctrl_cmd(struct ublk_dev *dev,
static int ublk_ctrl_stop_dev(struct ublk_dev *dev)
{
struct ublk_ctrl_cmd_data data = {
- .cmd_op = UBLK_CMD_STOP_DEV,
+ .cmd_op = UBLK_U_CMD_STOP_DEV,
};
return __ublk_ctrl_cmd(dev, &data);
@@ -118,6 +120,27 @@ static int ublk_ctrl_start_dev(struct ublk_dev *dev,
return __ublk_ctrl_cmd(dev, &data);
}
+static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_START_USER_RECOVERY,
+ };
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_END_USER_RECOVERY,
+ .flags = CTRL_CMD_HAS_DATA,
+ };
+
+ dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
static int ublk_ctrl_add_dev(struct ublk_dev *dev)
{
struct ublk_ctrl_cmd_data data = {
@@ -169,7 +192,7 @@ static int ublk_ctrl_get_params(struct ublk_dev *dev,
struct ublk_params *params)
{
struct ublk_ctrl_cmd_data data = {
- .cmd_op = UBLK_CMD_GET_PARAMS,
+ .cmd_op = UBLK_U_CMD_GET_PARAMS,
.flags = CTRL_CMD_HAS_BUF,
.addr = (__u64)params,
.len = sizeof(*params),
@@ -207,15 +230,84 @@ static const char *ublk_dev_state_desc(struct ublk_dev *dev)
};
}
+static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len)
+{
+ unsigned done = 0;
+ int i;
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (CPU_ISSET(i, set))
+ done += snprintf(&buf[done], len - done, "%d ", i);
+ }
+}
+
+static void ublk_adjust_affinity(cpu_set_t *set)
+{
+ int j, updated = 0;
+
+ /*
+ * Just keep the 1st CPU now.
+ *
+ * In future, auto affinity selection can be tried.
+ */
+ for (j = 0; j < CPU_SETSIZE; j++) {
+ if (CPU_ISSET(j, set)) {
+ if (!updated) {
+ updated = 1;
+ continue;
+ }
+ CPU_CLR(j, set);
+ }
+ }
+}
+
+/* Caller must free the allocated buffer */
+static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY,
+ .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF,
+ };
+ cpu_set_t *buf;
+ int i, ret;
+
+ buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues);
+ if (!buf)
+ return -ENOMEM;
+
+ for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) {
+ data.data[0] = i;
+ data.len = sizeof(cpu_set_t);
+ data.addr = (__u64)&buf[i];
+
+ ret = __ublk_ctrl_cmd(ctrl_dev, &data);
+ if (ret < 0) {
+ free(buf);
+ return ret;
+ }
+ ublk_adjust_affinity(&buf[i]);
+ }
+
+ *ptr_buf = buf;
+ return 0;
+}
+
static void ublk_ctrl_dump(struct ublk_dev *dev)
{
struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
struct ublk_params p;
+ cpu_set_t *affinity;
int ret;
ret = ublk_ctrl_get_params(dev, &p);
if (ret < 0) {
- ublk_err("failed to get params %m\n");
+ ublk_err("failed to get params %d %s\n", ret, strerror(-ret));
+ return;
+ }
+
+ ret = ublk_ctrl_get_affinity(dev, &affinity);
+ if (ret < 0) {
+ ublk_err("failed to get affinity %m\n");
return;
}
@@ -225,6 +317,19 @@ static void ublk_ctrl_dump(struct ublk_dev *dev)
ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n",
info->max_io_buf_bytes, info->ublksrv_pid, info->flags,
ublk_dev_state_desc(dev));
+
+ if (affinity) {
+ char buf[512];
+ int i;
+
+ for (i = 0; i < info->nr_hw_queues; i++) {
+ ublk_print_cpu_set(&affinity[i], buf, sizeof(buf));
+ printf("\tqueue %u: tid %d affinity(%s)\n",
+ i, dev->q[i].tid, buf);
+ }
+ free(affinity);
+ }
+
fflush(stdout);
}
@@ -322,7 +427,7 @@ static int ublk_queue_init(struct ublk_queue *q)
cmd_buf_size = ublk_queue_cmd_buf_sz(q);
off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz();
- q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ,
+ q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ,
MAP_SHARED | MAP_POPULATE, dev->fds[0], off);
if (q->io_cmd_buf == MAP_FAILED) {
ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n",
@@ -347,7 +452,9 @@ static int ublk_queue_init(struct ublk_queue *q)
}
ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
- IORING_SETUP_COOP_TASKRUN);
+ IORING_SETUP_COOP_TASKRUN |
+ IORING_SETUP_SINGLE_ISSUER |
+ IORING_SETUP_DEFER_TASKRUN);
if (ret < 0) {
ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
q->dev->dev_info.dev_id, q->q_id, ret);
@@ -429,12 +536,17 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag)
if (!(io->flags & UBLKSRV_IO_FREE))
return 0;
- /* we issue because we need either fetching or committing */
+ /*
+ * we issue because we need either fetching or committing or
+ * getting data
+ */
if (!(io->flags &
- (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP)))
+ (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA)))
return 0;
- if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
+ if (io->flags & UBLKSRV_NEED_GET_DATA)
+ cmd_op = UBLK_U_IO_NEED_GET_DATA;
+ else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP)
cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ;
else if (io->flags & UBLKSRV_NEED_FETCH_RQ)
cmd_op = UBLK_U_IO_FETCH_REQ;
@@ -551,6 +663,9 @@ static void ublk_handle_cqe(struct io_uring *r,
assert(tag < q->q_depth);
if (q->tgt_ops->queue_io)
q->tgt_ops->queue_io(q, tag);
+ } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
+ io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE;
+ ublk_queue_io_cmd(q, io, tag);
} else {
/*
* COMMIT_REQ will be completed immediately since no fetching
@@ -602,9 +717,24 @@ static int ublk_process_io(struct ublk_queue *q)
return reapped;
}
+static void ublk_queue_set_sched_affinity(const struct ublk_queue *q,
+ cpu_set_t *cpuset)
+{
+ if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0)
+ ublk_err("ublk dev %u queue %u set affinity failed",
+ q->dev->dev_info.dev_id, q->q_id);
+}
+
+struct ublk_queue_info {
+ struct ublk_queue *q;
+ sem_t *queue_sem;
+ cpu_set_t *affinity;
+};
+
static void *ublk_io_handler_fn(void *data)
{
- struct ublk_queue *q = data;
+ struct ublk_queue_info *info = data;
+ struct ublk_queue *q = info->q;
int dev_id = q->dev->dev_info.dev_id;
int ret;
@@ -614,6 +744,10 @@ static void *ublk_io_handler_fn(void *data)
dev_id, q->q_id);
return NULL;
}
+ /* IO perf is sensitive with queue pthread affinity on NUMA machine*/
+ ublk_queue_set_sched_affinity(q, info->affinity);
+ sem_post(info->queue_sem);
+
ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
q->tid, dev_id, q->q_id);
@@ -639,7 +773,7 @@ static void ublk_set_parameters(struct ublk_dev *dev)
dev->dev_info.dev_id, ret);
}
-static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
+static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id)
{
uint64_t id;
int evtfd = ctx->_evtfd;
@@ -652,36 +786,68 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
else
id = ERROR_EVTFD_DEVID;
+ if (dev && ctx->shadow_dev)
+ memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q));
+
if (write(evtfd, &id, sizeof(id)) != sizeof(id))
return -EINVAL;
+ close(evtfd);
+ shmdt(ctx->shadow_dev);
+
return 0;
}
static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
{
- int ret, i;
- void *thread_ret;
const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
+ struct ublk_queue_info *qinfo;
+ cpu_set_t *affinity_buf;
+ void *thread_ret;
+ sem_t queue_sem;
+ int ret, i;
ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
+ qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info),
+ dinfo->nr_hw_queues);
+ if (!qinfo)
+ return -ENOMEM;
+
+ sem_init(&queue_sem, 0, 0);
ret = ublk_dev_prep(ctx, dev);
if (ret)
return ret;
+ ret = ublk_ctrl_get_affinity(dev, &affinity_buf);
+ if (ret)
+ return ret;
+
for (i = 0; i < dinfo->nr_hw_queues; i++) {
dev->q[i].dev = dev;
dev->q[i].q_id = i;
+
+ qinfo[i].q = &dev->q[i];
+ qinfo[i].queue_sem = &queue_sem;
+ qinfo[i].affinity = &affinity_buf[i];
pthread_create(&dev->q[i].thread, NULL,
ublk_io_handler_fn,
- &dev->q[i]);
+ &qinfo[i]);
}
+ for (i = 0; i < dinfo->nr_hw_queues; i++)
+ sem_wait(&queue_sem);
+ free(qinfo);
+ free(affinity_buf);
+
/* everything is fine now, start us */
- ublk_set_parameters(dev);
- ret = ublk_ctrl_start_dev(dev, getpid());
+ if (ctx->recovery)
+ ret = ublk_ctrl_end_user_recovery(dev, getpid());
+ else {
+ ublk_set_parameters(dev);
+ ret = ublk_ctrl_start_dev(dev, getpid());
+ }
if (ret < 0) {
ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
goto fail;
@@ -691,7 +857,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
if (ctx->fg)
ublk_ctrl_dump(dev);
else
- ublk_send_dev_event(ctx, dev->dev_info.dev_id);
+ ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id);
/* wait until we are terminated */
for (i = 0; i < dinfo->nr_hw_queues; i++)
@@ -856,7 +1022,10 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
}
}
- ret = ublk_ctrl_add_dev(dev);
+ if (ctx->recovery)
+ ret = ublk_ctrl_start_user_recovery(dev);
+ else
+ ret = ublk_ctrl_add_dev(dev);
if (ret < 0) {
ublk_err("%s: can't add dev id %d, type %s ret %d\n",
__func__, dev_id, tgt_type, ret);
@@ -870,7 +1039,7 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
fail:
if (ret < 0)
- ublk_send_dev_event(ctx, -1);
+ ublk_send_dev_event(ctx, dev, -1);
ublk_ctrl_deinit(dev);
return ret;
}
@@ -884,30 +1053,58 @@ static int cmd_dev_add(struct dev_ctx *ctx)
if (ctx->fg)
goto run;
+ ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666);
+ if (ctx->_shmid < 0) {
+ ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno));
+ exit(-1);
+ }
+ ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0);
+ if (ctx->shadow_dev == (struct ublk_dev *)-1) {
+ ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno));
+ exit(-1);
+ }
ctx->_evtfd = eventfd(0, 0);
if (ctx->_evtfd < 0) {
ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno));
exit(-1);
}
- setsid();
res = fork();
if (res == 0) {
+ int res2;
+
+ setsid();
+ res2 = fork();
+ if (res2 == 0) {
+ /* prepare for detaching */
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
run:
- res = __cmd_dev_add(ctx);
- return res;
+ res = __cmd_dev_add(ctx);
+ return res;
+ } else {
+ /* detached from the foreground task */
+ exit(EXIT_SUCCESS);
+ }
} else if (res > 0) {
uint64_t id;
+ int exit_code = EXIT_FAILURE;
res = read(ctx->_evtfd, &id, sizeof(id));
close(ctx->_evtfd);
if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) {
ctx->dev_id = id - 1;
- return __cmd_dev_list(ctx);
+ if (__cmd_dev_list(ctx) >= 0)
+ exit_code = EXIT_SUCCESS;
}
- exit(EXIT_FAILURE);
+ shmdt(ctx->shadow_dev);
+ shmctl(ctx->_shmid, IPC_RMID, NULL);
+ /* wait for child and detach from it */
+ wait(NULL);
+ exit(exit_code);
} else {
- return res;
+ exit(EXIT_FAILURE);
}
}
@@ -969,6 +1166,9 @@ static int __cmd_dev_list(struct dev_ctx *ctx)
ublk_err("%s: can't get dev info from %d: %d\n",
__func__, ctx->dev_id, ret);
} else {
+ if (ctx->shadow_dev)
+ memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q));
+
ublk_ctrl_dump(dev);
}
@@ -1039,14 +1239,47 @@ static int cmd_dev_get_features(void)
return ret;
}
+static void __cmd_create_help(char *exe, bool recovery)
+{
+ int i;
+
+ printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
+ exe, recovery ? "recover" : "add");
+ printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g]\n");
+ printf("\t[-e 0|1 ] [-i 0|1]\n");
+ printf("\t[target options] [backfile1] [backfile2] ...\n");
+ printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
+
+ for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) {
+ const struct ublk_tgt_ops *ops = tgt_ops_list[i];
+
+ if (ops->usage)
+ ops->usage(ops);
+ }
+}
+
+static void cmd_add_help(char *exe)
+{
+ __cmd_create_help(exe, false);
+ printf("\n");
+}
+
+static void cmd_recover_help(char *exe)
+{
+ __cmd_create_help(exe, true);
+ printf("\tPlease provide exact command line for creating this device with real dev_id\n");
+ printf("\n");
+}
+
static int cmd_dev_help(char *exe)
{
- printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe);
- printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n");
+ cmd_add_help(exe);
+ cmd_recover_help(exe);
+
printf("%s del [-n dev_id] -a \n", exe);
- printf("\t -a delete all devices -n delete specified device\n");
+ printf("\t -a delete all devices -n delete specified device\n\n");
printf("%s list [-n dev_id] -a \n", exe);
- printf("\t -a list all devices, -n list specified device, default -a \n");
+ printf("\t -a list all devices, -n list specified device, default -a \n\n");
printf("%s features\n", exe);
return 0;
}
@@ -1063,9 +1296,13 @@ int main(int argc, char *argv[])
{ "quiet", 0, NULL, 0 },
{ "zero_copy", 0, NULL, 'z' },
{ "foreground", 0, NULL, 0 },
- { "chunk_size", 1, NULL, 0 },
+ { "recovery", 1, NULL, 'r' },
+ { "recovery_fail_io", 1, NULL, 'e'},
+ { "recovery_reissue", 1, NULL, 'i'},
+ { "get_data", 1, NULL, 'g'},
{ 0, 0, 0, 0 }
};
+ const struct ublk_tgt_ops *ops = NULL;
int option_idx, opt;
const char *cmd = argv[1];
struct dev_ctx ctx = {
@@ -1073,15 +1310,18 @@ int main(int argc, char *argv[])
.nr_hw_queues = 2,
.dev_id = -1,
.tgt_type = "unknown",
- .chunk_size = 65536, /* def chunk size is 64K */
};
int ret = -EINVAL, i;
+ int tgt_argc = 1;
+ char *tgt_argv[MAX_NR_TGT_ARG] = { NULL };
+ int value;
if (argc == 1)
return ret;
+ opterr = 0;
optind = 2;
- while ((opt = getopt_long(argc, argv, "t:n:d:q:az",
+ while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:gaz",
longopts, &option_idx)) != -1) {
switch (opt) {
case 'a':
@@ -1103,6 +1343,24 @@ int main(int argc, char *argv[])
case 'z':
ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
break;
+ case 'r':
+ value = strtol(optarg, NULL, 10);
+ if (value)
+ ctx.flags |= UBLK_F_USER_RECOVERY;
+ break;
+ case 'e':
+ value = strtol(optarg, NULL, 10);
+ if (value)
+ ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO;
+ break;
+ case 'i':
+ value = strtol(optarg, NULL, 10);
+ if (value)
+ ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE;
+ break;
+ case 'g':
+ ctx.flags |= UBLK_F_NEED_GET_DATA;
+ break;
case 0:
if (!strcmp(longopts[option_idx].name, "debug_mask"))
ublk_dbg_mask = strtol(optarg, NULL, 16);
@@ -1110,8 +1368,26 @@ int main(int argc, char *argv[])
ublk_dbg_mask = 0;
if (!strcmp(longopts[option_idx].name, "foreground"))
ctx.fg = 1;
- if (!strcmp(longopts[option_idx].name, "chunk_size"))
- ctx.chunk_size = strtol(optarg, NULL, 10);
+ break;
+ case '?':
+ /*
+ * target requires every option must have argument
+ */
+ if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') {
+ fprintf(stderr, "every target option requires argument: %s %s\n",
+ argv[optind - 1], argv[optind]);
+ exit(EXIT_FAILURE);
+ }
+
+ if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) {
+ tgt_argv[tgt_argc++] = argv[optind - 1];
+ tgt_argv[tgt_argc++] = argv[optind];
+ } else {
+ fprintf(stderr, "too many target options\n");
+ exit(EXIT_FAILURE);
+ }
+ optind += 1;
+ break;
}
}
@@ -1120,9 +1396,25 @@ int main(int argc, char *argv[])
ctx.files[ctx.nr_files++] = argv[i++];
}
+ ops = ublk_find_tgt(ctx.tgt_type);
+ if (ops && ops->parse_cmd_line) {
+ optind = 0;
+
+ tgt_argv[0] = ctx.tgt_type;
+ ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv);
+ }
+
if (!strcmp(cmd, "add"))
ret = cmd_dev_add(&ctx);
- else if (!strcmp(cmd, "del"))
+ else if (!strcmp(cmd, "recover")) {
+ if (ctx.dev_id < 0) {
+ fprintf(stderr, "device id isn't provided for recovering\n");
+ ret = -EINVAL;
+ } else {
+ ctx.recovery = 1;
+ ret = cmd_dev_add(&ctx);
+ }
+ } else if (!strcmp(cmd, "del"))
ret = cmd_dev_del(&ctx);
else if (!strcmp(cmd, "list")) {
ctx.all = 1;
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index f31a5c4d4143..44ee1e4ac55b 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -20,9 +20,15 @@
#include <sys/wait.h>
#include <sys/eventfd.h>
#include <sys/uio.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <linux/io_uring.h>
#include <liburing.h>
-#include <linux/ublk_cmd.h>
+#include <semaphore.h>
+
+/* allow ublk_dep.h to override ublk_cmd.h */
#include "ublk_dep.h"
+#include <linux/ublk_cmd.h>
#define __maybe_unused __attribute__((unused))
#define MAX_BACK_FILES 4
@@ -30,6 +36,8 @@
#define min(a, b) ((a) < (b) ? (a) : (b))
#endif
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
/****************** part 1: libublk ********************/
#define CTRL_DEV "/dev/ublk-control"
@@ -42,8 +50,8 @@
#define UBLKSRV_IO_IDLE_SECS 20
#define UBLK_IO_MAX_BYTES (1 << 20)
-#define UBLK_MAX_QUEUES 4
-#define UBLK_QUEUE_DEPTH 128
+#define UBLK_MAX_QUEUES 32
+#define UBLK_QUEUE_DEPTH 1024
#define UBLK_DBG_DEV (1U << 0)
#define UBLK_DBG_QUEUE (1U << 1)
@@ -55,6 +63,16 @@
struct ublk_dev;
struct ublk_queue;
+struct stripe_ctx {
+ /* stripe */
+ unsigned int chunk_size;
+};
+
+struct fault_inject_ctx {
+ /* fault_inject */
+ unsigned long delay_us;
+};
+
struct dev_ctx {
char tgt_type[16];
unsigned long flags;
@@ -66,11 +84,18 @@ struct dev_ctx {
unsigned int logging:1;
unsigned int all:1;
unsigned int fg:1;
-
- /* stripe */
- unsigned int chunk_size;
+ unsigned int recovery:1;
int _evtfd;
+ int _shmid;
+
+ /* built from shmem, only for ublk_dump_dev() */
+ struct ublk_dev *shadow_dev;
+
+ union {
+ struct stripe_ctx stripe;
+ struct fault_inject_ctx fault_inject;
+ };
};
struct ublk_ctrl_cmd_data {
@@ -90,6 +115,7 @@ struct ublk_io {
#define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
#define UBLKSRV_IO_FREE (1UL << 2)
+#define UBLKSRV_NEED_GET_DATA (1UL << 3)
unsigned short flags;
unsigned short refs; /* used by target code only */
@@ -107,6 +133,14 @@ struct ublk_tgt_ops {
int (*queue_io)(struct ublk_queue *, int tag);
void (*tgt_io_done)(struct ublk_queue *,
int tag, const struct io_uring_cqe *);
+
+ /*
+ * Target specific command line handling
+ *
+ * each option requires argument for target command line
+ */
+ void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
+ void (*usage)(const struct ublk_tgt_ops *ops);
};
struct ublk_tgt {
@@ -128,7 +162,7 @@ struct ublk_queue {
unsigned int io_inflight;
struct ublk_dev *dev;
const struct ublk_tgt_ops *tgt_ops;
- char *io_cmd_buf;
+ struct ublksrv_io_desc *io_cmd_buf;
struct io_uring ring;
struct ublk_io ios[UBLK_QUEUE_DEPTH];
#define UBLKSRV_QUEUE_STOPPING (1U << 0)
@@ -302,7 +336,7 @@ static inline void ublk_mark_io_done(struct ublk_io *io, int res)
static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag)
{
- return (struct ublksrv_io_desc *)&(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
+ return &q->io_cmd_buf[tag];
}
static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op)
@@ -357,6 +391,7 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q)
extern const struct ublk_tgt_ops null_tgt_ops;
extern const struct ublk_tgt_ops loop_tgt_ops;
extern const struct ublk_tgt_ops stripe_tgt_ops;
+extern const struct ublk_tgt_ops fault_inject_tgt_ops;
void backing_file_tgt_deinit(struct ublk_dev *dev);
int backing_file_tgt_init(struct ublk_dev *dev);
diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c
index 899875ff50fe..91fec3690d4b 100644
--- a/tools/testing/selftests/ublk/null.c
+++ b/tools/testing/selftests/ublk/null.c
@@ -17,7 +17,8 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
dev->tgt.dev_size = dev_size;
dev->tgt.params = (struct ublk_params) {
- .types = UBLK_PARAM_TYPE_BASIC,
+ .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN |
+ UBLK_PARAM_TYPE_SEGMENT,
.basic = {
.logical_bs_shift = 9,
.physical_bs_shift = 12,
@@ -26,6 +27,14 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
.max_sectors = info->max_io_buf_bytes >> 9,
.dev_sectors = dev_size >> 9,
},
+ .dma = {
+ .alignment = 4095,
+ },
+ .seg = {
+ .seg_boundary_mask = 4095,
+ .max_segment_size = 32 << 10,
+ .max_segments = 32,
+ },
};
if (info->flags & UBLK_F_SUPPORT_ZERO_COPY)
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 98c564b12f3c..5dbd6392d83d 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -111,43 +111,67 @@ static void calculate_stripe_array(const struct stripe_conf *conf,
}
}
-static inline enum io_uring_op stripe_to_uring_op(const struct ublksrv_io_desc *iod)
+static inline enum io_uring_op stripe_to_uring_op(
+ const struct ublksrv_io_desc *iod, int zc)
{
unsigned ublk_op = ublksrv_get_op(iod);
if (ublk_op == UBLK_IO_OP_READ)
- return IORING_OP_READV;
+ return zc ? IORING_OP_READV_FIXED : IORING_OP_READV;
else if (ublk_op == UBLK_IO_OP_WRITE)
- return IORING_OP_WRITEV;
+ return zc ? IORING_OP_WRITEV_FIXED : IORING_OP_WRITEV;
assert(0);
}
static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
{
const struct stripe_conf *conf = get_chunk_shift(q);
- enum io_uring_op op = stripe_to_uring_op(iod);
+ int zc = !!(ublk_queue_use_zc(q) != 0);
+ enum io_uring_op op = stripe_to_uring_op(iod, zc);
struct io_uring_sqe *sqe[NR_STRIPE];
struct stripe_array *s = alloc_stripe_array(conf, iod);
struct ublk_io *io = ublk_get_io(q, tag);
- int i;
+ int i, extra = zc ? 2 : 0;
io->private_data = s;
calculate_stripe_array(conf, iod, s);
- ublk_queue_alloc_sqes(q, sqe, s->nr);
- for (i = 0; i < s->nr; i++) {
- struct stripe *t = &s->s[i];
+ ublk_queue_alloc_sqes(q, sqe, s->nr + extra);
+
+ if (zc) {
+ io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
+ sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
+ sqe[0]->user_data = build_user_data(tag,
+ ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
+ }
+
+ for (i = zc; i < s->nr + extra - zc; i++) {
+ struct stripe *t = &s->s[i - zc];
io_uring_prep_rw(op, sqe[i],
t->seq + 1,
(void *)t->vec,
t->nr_vec,
t->start << 9);
- io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
+ if (zc) {
+ sqe[i]->buf_index = tag;
+ io_uring_sqe_set_flags(sqe[i],
+ IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK);
+ } else {
+ io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
+ }
/* bit63 marks us as tgt io */
- sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i, 1);
+ sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1);
+ }
+ if (zc) {
+ struct io_uring_sqe *unreg = sqe[s->nr + 1];
+
+ io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, tag);
+ unreg->user_data = build_user_data(tag, ublk_cmd_op_nr(unreg->cmd_op), 0, 1);
}
- return s->nr;
+
+ /* register buffer is skip_success */
+ return s->nr + zc;
}
static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
@@ -208,19 +232,27 @@ static void ublk_stripe_io_done(struct ublk_queue *q, int tag,
struct ublk_io *io = ublk_get_io(q, tag);
int res = cqe->res;
- if (res < 0) {
+ if (res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
if (!io->result)
io->result = res;
- ublk_err("%s: io failure %d tag %u\n", __func__, res, tag);
+ if (res < 0)
+ ublk_err("%s: io failure %d tag %u\n", __func__, res, tag);
}
+ /* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
+ if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
+ io->tgt_ios += 1;
+
/* fail short READ/WRITE simply */
if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) {
unsigned seq = user_data_to_tgt_data(cqe->user_data);
struct stripe_array *s = io->private_data;
- if (res < s->s[seq].vec->iov_len)
+ if (res < s->s[seq].nr_sects << 9) {
io->result = -EIO;
+ ublk_err("%s: short rw op %u res %d exp %u tag %u\n",
+ __func__, op, res, s->s[seq].vec->iov_len, tag);
+ }
}
if (ublk_completed_tgt_io(q, tag)) {
@@ -249,11 +281,11 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
},
};
- unsigned chunk_size = ctx->chunk_size;
+ unsigned chunk_size = ctx->stripe.chunk_size;
struct stripe_conf *conf;
unsigned chunk_shift;
loff_t bytes = 0;
- int ret, i;
+ int ret, i, mul = 1;
if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
ublk_err("invalid chunk size %u\n", chunk_size);
@@ -295,8 +327,11 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
dev->tgt.dev_size = bytes;
p.basic.dev_sectors = bytes >> 9;
dev->tgt.params = p;
- dev->tgt.sq_depth = dev->dev_info.queue_depth * conf->nr_files;
- dev->tgt.cq_depth = dev->dev_info.queue_depth * conf->nr_files;
+
+ if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY)
+ mul = 2;
+ dev->tgt.sq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;
+ dev->tgt.cq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;
printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files);
@@ -309,10 +344,36 @@ static void ublk_stripe_tgt_deinit(struct ublk_dev *dev)
backing_file_tgt_deinit(dev);
}
+static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "chunk_size", 1, NULL, 0 },
+ { 0, 0, 0, 0 }
+ };
+ int option_idx, opt;
+
+ ctx->stripe.chunk_size = 65536;
+ while ((opt = getopt_long(argc, argv, "",
+ longopts, &option_idx)) != -1) {
+ switch (opt) {
+ case 0:
+ if (!strcmp(longopts[option_idx].name, "chunk_size"))
+ ctx->stripe.chunk_size = strtol(optarg, NULL, 10);
+ }
+ }
+}
+
+static void ublk_stripe_usage(const struct ublk_tgt_ops *ops)
+{
+ printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n");
+}
+
const struct ublk_tgt_ops stripe_tgt_ops = {
.name = "stripe",
.init_tgt = ublk_stripe_tgt_init,
.deinit_tgt = ublk_stripe_tgt_deinit,
.queue_io = ublk_stripe_queue_io,
.tgt_io_done = ublk_stripe_io_done,
+ .parse_cmd_line = ublk_stripe_cmd_line,
+ .usage = ublk_stripe_usage,
};
diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh
index 75f54ac6b1c4..a81210ca3e99 100755
--- a/tools/testing/selftests/ublk/test_common.sh
+++ b/tools/testing/selftests/ublk/test_common.sh
@@ -17,25 +17,39 @@ _get_disk_dev_t() {
local minor
dev=/dev/ublkb"${dev_id}"
- major=$(stat -c '%Hr' "$dev")
- minor=$(stat -c '%Lr' "$dev")
+ major="0x"$(stat -c '%t' "$dev")
+ minor="0x"$(stat -c '%T' "$dev")
echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) ))
}
+_run_fio_verify_io() {
+ fio --name=verify --rw=randwrite --direct=1 --ioengine=libaio \
+ --bs=8k --iodepth=32 --verify=crc32c --do_verify=1 \
+ --verify_state_save=0 "$@" > /dev/null
+}
+
_create_backfile() {
- local my_size=$1
- local my_file
+ local index=$1
+ local new_size=$2
+ local old_file
+ local new_file
- my_file=$(mktemp ublk_file_"${my_size}"_XXXXX)
- truncate -s "${my_size}" "${my_file}"
- echo "$my_file"
+ old_file="${UBLK_BACKFILES[$index]}"
+ [ -f "$old_file" ] && rm -f "$old_file"
+
+ new_file=$(mktemp ublk_file_"${new_size}"_XXXXX)
+ truncate -s "${new_size}" "${new_file}"
+ UBLK_BACKFILES["$index"]="$new_file"
}
-_remove_backfile() {
- local file=$1
+_remove_files() {
+ local file
- [ -f "$file" ] && rm -f "$file"
+ for file in "${UBLK_BACKFILES[@]}"; do
+ [ -f "$file" ] && rm -f "$file"
+ done
+ [ -f "$UBLK_TMP" ] && rm -f "$UBLK_TMP"
}
_create_tmp_dir() {
@@ -100,6 +114,7 @@ _prep_test() {
local type=$1
shift 1
modprobe ublk_drv > /dev/null 2>&1
+ UBLK_TMP=$(mktemp ublk_test_XXXXX)
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*"
}
@@ -123,7 +138,10 @@ _show_result()
echo "$1 : [FAIL]"
fi
fi
- [ "$2" -ne 0 ] && exit "$2"
+ if [ "$2" -ne 0 ]; then
+ _remove_files
+ exit "$2"
+ fi
return 0
}
@@ -132,16 +150,16 @@ _check_add_dev()
{
local tid=$1
local code=$2
- shift 2
+
if [ "${code}" -ne 0 ]; then
- _remove_test_files "$@"
_show_result "${tid}" "${code}"
fi
}
_cleanup_test() {
"${UBLK_PROG}" del -a
- rm -f "$UBLK_TMP"
+
+ _remove_files
}
_have_feature()
@@ -152,9 +170,11 @@ _have_feature()
return 1
}
-_add_ublk_dev() {
- local kublk_temp;
+_create_ublk_dev() {
local dev_id;
+ local cmd=$1
+
+ shift 1
if [ ! -c /dev/ublk-control ]; then
return ${UBLK_SKIP_CODE}
@@ -165,17 +185,34 @@ _add_ublk_dev() {
fi
fi
- kublk_temp=$(mktemp /tmp/kublk-XXXXXX)
- if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then
+ if ! dev_id=$("${UBLK_PROG}" "$cmd" "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then
echo "fail to add ublk dev $*"
- rm -f "${kublk_temp}"
return 255
fi
-
- dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}')
udevadm settle
- rm -f "${kublk_temp}"
- echo "${dev_id}"
+
+ if [[ "$dev_id" =~ ^[0-9]+$ ]]; then
+ echo "${dev_id}"
+ else
+ return 255
+ fi
+}
+
+_add_ublk_dev() {
+ _create_ublk_dev "add" "$@"
+}
+
+_recover_ublk_dev() {
+ local dev_id
+ local state
+
+ dev_id=$(_create_ublk_dev "recover" "$@")
+ for ((j=0;j<20;j++)); do
+ state=$(_get_ublk_dev_state "${dev_id}")
+ [ "$state" == "LIVE" ] && break
+ sleep 1
+ done
+ echo "$state"
}
# kill the ublk daemon and return ublk device state
@@ -214,7 +251,7 @@ __run_io_and_remove()
local kill_server=$3
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
- --rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \
+ --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
--runtime=20 --time_based > /dev/null 2>&1 &
sleep 2
if [ "${kill_server}" = "yes" ]; then
@@ -232,15 +269,80 @@ __run_io_and_remove()
wait
}
+run_io_and_remove()
+{
+ local size=$1
+ local dev_id
+ shift 1
+
+ dev_id=$(_add_ublk_dev "$@")
+ _check_add_dev "$TID" $?
+
+ [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
+ if ! __run_io_and_remove "$dev_id" "${size}" "no"; then
+ echo "/dev/ublkc$dev_id isn't removed"
+ exit 255
+ fi
+}
+
+run_io_and_kill_daemon()
+{
+ local size=$1
+ local dev_id
+ shift 1
+
+ dev_id=$(_add_ublk_dev "$@")
+ _check_add_dev "$TID" $?
+
+ [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)"
+ if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then
+ echo "/dev/ublkc$dev_id isn't removed res ${res}"
+ exit 255
+ fi
+}
+
+run_io_and_recover()
+{
+ local state
+ local dev_id
+
+ dev_id=$(_add_ublk_dev "$@")
+ _check_add_dev "$TID" $?
+
+ fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
+ --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
+ --runtime=20 --time_based > /dev/null 2>&1 &
+ sleep 4
+
+ state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED")
+ if [ "$state" != "QUIESCED" ]; then
+ echo "device isn't quiesced($state) after killing daemon"
+ return 255
+ fi
+
+ state=$(_recover_ublk_dev -n "$dev_id" "$@")
+ if [ "$state" != "LIVE" ]; then
+ echo "faile to recover to LIVE($state)"
+ return 255
+ fi
+
+ if ! __remove_ublk_dev_return "${dev_id}"; then
+ echo "delete dev ${dev_id} failed"
+ return 255
+ fi
+ wait
+}
+
+
_ublk_test_top_dir()
{
cd "$(dirname "$0")" && pwd
}
-UBLK_TMP=$(mktemp ublk_test_XXXXX)
UBLK_PROG=$(_ublk_test_top_dir)/kublk
UBLK_TEST_QUIET=1
UBLK_TEST_SHOW_RESULT=1
+UBLK_BACKFILES=()
export UBLK_PROG
export UBLK_TEST_QUIET
export UBLK_TEST_SHOW_RESULT
diff --git a/tools/testing/selftests/ublk/test_generic_02.sh b/tools/testing/selftests/ublk/test_generic_02.sh
new file mode 100755
index 000000000000..3e80121e3bf5
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_02.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_02"
+ERR_CODE=0
+
+if ! _have_program bpftrace; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "null" "sequential io order for MQ"
+
+dev_id=$(_add_ublk_dev -t null -q 2)
+_check_add_dev $TID $?
+
+dev_t=$(_get_disk_dev_t "$dev_id")
+bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
+btrace_pid=$!
+sleep 2
+
+if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
+ _cleanup_test "null"
+ exit "$UBLK_SKIP_CODE"
+fi
+
+# run fio over this ublk disk
+fio --name=write_seq \
+ --filename=/dev/ublkb"${dev_id}" \
+ --ioengine=libaio --iodepth=16 \
+ --rw=write \
+ --size=512M \
+ --direct=1 \
+ --bs=4k > /dev/null 2>&1
+ERR_CODE=$?
+kill "$btrace_pid"
+wait
+if grep -q "io_out_of_order" "$UBLK_TMP"; then
+ cat "$UBLK_TMP"
+ ERR_CODE=255
+fi
+_cleanup_test "null"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_03.sh b/tools/testing/selftests/ublk/test_generic_03.sh
new file mode 100755
index 000000000000..b551aa76cb0d
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_03.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_03"
+ERR_CODE=0
+
+_prep_test "null" "check dma & segment limits for zero copy"
+
+dev_id=$(_add_ublk_dev -t null -z)
+_check_add_dev $TID $?
+
+sysfs_path=/sys/block/ublkb"${dev_id}"
+dma_align=$(cat "$sysfs_path"/queue/dma_alignment)
+max_segments=$(cat "$sysfs_path"/queue/max_segments)
+max_segment_size=$(cat "$sysfs_path"/queue/max_segment_size)
+if [ "$dma_align" != "4095" ]; then
+ ERR_CODE=255
+fi
+if [ "$max_segments" != "32" ]; then
+ ERR_CODE=255
+fi
+if [ "$max_segment_size" != "32768" ]; then
+ ERR_CODE=255
+fi
+_cleanup_test "null"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh
new file mode 100755
index 000000000000..8a3bc080c577
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_04.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_04"
+ERR_CODE=0
+
+ublk_run_recover_test()
+{
+ run_io_and_recover "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "recover" "basic recover function verification"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_run_recover_test -t null -q 2 -r 1 &
+ublk_run_recover_test -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_run_recover_test -t null -q 2 -r 1 -i 1 &
+ublk_run_recover_test -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "recover"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh
new file mode 100755
index 000000000000..3bb00a347402
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_05.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_05"
+ERR_CODE=0
+
+ublk_run_recover_test()
+{
+ run_io_and_recover "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_feature "ZERO_COPY"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "recover" "basic recover function verification (zero copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_run_recover_test -t null -q 2 -r 1 -z &
+ublk_run_recover_test -t loop -q 2 -r 1 -z "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_run_recover_test -t null -q 2 -r 1 -z -i 1 &
+ublk_run_recover_test -t loop -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "recover"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh
new file mode 100755
index 000000000000..b67230c42c84
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_06.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_06"
+ERR_CODE=0
+
+_prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server"
+
+# configure ublk server to sleep 2s before completing each I/O
+dev_id=$(_add_ublk_dev -t fault_inject -q 2 -d 1 --delay_us 2000000)
+_check_add_dev $TID $?
+
+STARTTIME=${SECONDS}
+
+dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 &
+dd_pid=$!
+
+__ublk_kill_daemon ${dev_id} "DEAD"
+
+wait $dd_pid
+dd_exitcode=$?
+
+ENDTIME=${SECONDS}
+ELAPSED=$(($ENDTIME - $STARTTIME))
+
+# assert that dd sees an error and exits quickly after ublk server is
+# killed. previously this relied on seeing an I/O timeout and so would
+# take ~30s
+if [ $dd_exitcode -eq 0 ]; then
+ echo "dd unexpectedly exited successfully!"
+ ERR_CODE=255
+fi
+if [ $ELAPSED -ge 5 ]; then
+ echo "dd took $ELAPSED seconds to exit (>= 5s tolerance)!"
+ ERR_CODE=255
+fi
+
+_cleanup_test "fault_inject"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_07.sh b/tools/testing/selftests/ublk/test_generic_07.sh
new file mode 100755
index 000000000000..cba86451fa5e
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_07.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_07"
+ERR_CODE=0
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "generic" "test UBLK_F_NEED_GET_DATA"
+
+_create_backfile 0 256M
+dev_id=$(_add_ublk_dev -t loop -q 2 -g "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+# run fio over the ublk disk
+_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
+ERR_CODE=$?
+if [ "$ERR_CODE" -eq 0 ]; then
+ _mkfs_mount_test /dev/ublkb"${dev_id}"
+ ERR_CODE=$?
+fi
+
+_cleanup_test "generic"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh
index c882d2a08e13..833fa0dbc700 100755
--- a/tools/testing/selftests/ublk/test_loop_01.sh
+++ b/tools/testing/selftests/ublk/test_loop_01.sh
@@ -6,27 +6,21 @@
TID="loop_01"
ERR_CODE=0
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
_prep_test "loop" "write and verify test"
-backfile_0=$(_create_backfile 256M)
+_create_backfile 0 256M
-dev_id=$(_add_ublk_dev -t loop "$backfile_0")
-_check_add_dev $TID $? "${backfile_0}"
+dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
-fio --name=write_and_verify \
- --filename=/dev/ublkb"${dev_id}" \
- --ioengine=libaio --iodepth=16 \
- --rw=write \
- --size=256M \
- --direct=1 \
- --verify=crc32c \
- --do_verify=1 \
- --bs=4k > /dev/null 2>&1
+_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_02.sh b/tools/testing/selftests/ublk/test_loop_02.sh
index 03863d825e07..874568b3646b 100755
--- a/tools/testing/selftests/ublk/test_loop_02.sh
+++ b/tools/testing/selftests/ublk/test_loop_02.sh
@@ -8,15 +8,13 @@ ERR_CODE=0
_prep_test "loop" "mkfs & mount & umount"
-backfile_0=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t loop "$backfile_0")
-_check_add_dev $TID $? "$backfile_0"
+_create_backfile 0 256M
+dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh
index 269c96787d7d..c30f797c6429 100755
--- a/tools/testing/selftests/ublk/test_loop_03.sh
+++ b/tools/testing/selftests/ublk/test_loop_03.sh
@@ -6,26 +6,20 @@
TID="loop_03"
ERR_CODE=0
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
_prep_test "loop" "write and verify over zero copy"
-backfile_0=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
-_check_add_dev $TID $? "$backfile_0"
+_create_backfile 0 256M
+dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
-fio --name=write_and_verify \
- --filename=/dev/ublkb"${dev_id}" \
- --ioengine=libaio --iodepth=64 \
- --rw=write \
- --size=256M \
- --direct=1 \
- --verify=crc32c \
- --do_verify=1 \
- --bs=4k > /dev/null 2>&1
+_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_04.sh b/tools/testing/selftests/ublk/test_loop_04.sh
index 1435422c38ec..b01d75b3214d 100755
--- a/tools/testing/selftests/ublk/test_loop_04.sh
+++ b/tools/testing/selftests/ublk/test_loop_04.sh
@@ -8,15 +8,14 @@ ERR_CODE=0
_prep_test "loop" "mkfs & mount & umount with zero copy"
-backfile_0=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
-_check_add_dev $TID $? "$backfile_0"
+_create_backfile 0 256M
+
+dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh
new file mode 100755
index 000000000000..de2141533074
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_loop_05.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="loop_05"
+ERR_CODE=0
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "loop" "write and verify test"
+
+_create_backfile 0 256M
+
+dev_id=$(_add_ublk_dev -q 2 -t loop "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
+
+# run fio over the ublk disk
+_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
+ERR_CODE=$?
+
+_cleanup_test "loop"
+
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh
index 7177f6c57bc5..7d3150f057d4 100755
--- a/tools/testing/selftests/ublk/test_stress_01.sh
+++ b/tools/testing/selftests/ublk/test_stress_01.sh
@@ -4,44 +4,31 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="stress_01"
ERR_CODE=0
-DEV_ID=-1
ublk_io_and_remove()
{
- local size=$1
- shift 1
- local backfile=""
- if echo "$@" | grep -q "loop"; then
- backfile=${*: -1}
- fi
- DEV_ID=$(_add_ublk_dev "$@")
- _check_add_dev $TID $? "${backfile}"
-
- [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
- if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then
- echo "/dev/ublkc${DEV_ID} isn't removed"
- _remove_backfile "${backfile}"
- exit 255
+ run_io_and_remove "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
fi
}
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
_prep_test "stress" "run IO and remove device"
-ublk_io_and_remove 8G -t null
-ERR_CODE=$?
-if [ ${ERR_CODE} -ne 0 ]; then
- _show_result $TID $ERR_CODE
-fi
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
-BACK_FILE=$(_create_backfile 256M)
-ublk_io_and_remove 256M -t loop "${BACK_FILE}"
-ERR_CODE=$?
-if [ ${ERR_CODE} -ne 0 ]; then
- _show_result $TID $ERR_CODE
-fi
+ublk_io_and_remove 8G -t null -q 4 &
+ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" &
+ublk_io_and_remove 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
-ublk_io_and_remove 256M -t loop -z "${BACK_FILE}"
-ERR_CODE=$?
_cleanup_test "stress"
-_remove_backfile "${BACK_FILE}"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh
index 2a8e60579a06..1a9065125ae1 100755
--- a/tools/testing/selftests/ublk/test_stress_02.sh
+++ b/tools/testing/selftests/ublk/test_stress_02.sh
@@ -4,44 +4,31 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="stress_02"
ERR_CODE=0
-DEV_ID=-1
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
ublk_io_and_kill_daemon()
{
- local size=$1
- shift 1
- local backfile=""
- if echo "$@" | grep -q "loop"; then
- backfile=${*: -1}
- fi
- DEV_ID=$(_add_ublk_dev "$@")
- _check_add_dev $TID $? "${backfile}"
-
- [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)"
- if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then
- echo "/dev/ublkc${DEV_ID} isn't removed res ${res}"
- _remove_backfile "${backfile}"
- exit 255
+ run_io_and_kill_daemon "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
fi
}
_prep_test "stress" "run IO and kill ublk server"
-ublk_io_and_kill_daemon 8G -t null
-ERR_CODE=$?
-if [ ${ERR_CODE} -ne 0 ]; then
- _show_result $TID $ERR_CODE
-fi
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
-BACK_FILE=$(_create_backfile 256M)
-ublk_io_and_kill_daemon 256M -t loop "${BACK_FILE}"
-ERR_CODE=$?
-if [ ${ERR_CODE} -ne 0 ]; then
- _show_result $TID $ERR_CODE
-fi
+ublk_io_and_kill_daemon 8G -t null -q 4 &
+ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" &
+ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
-ublk_io_and_kill_daemon 256M -t loop -z "${BACK_FILE}"
-ERR_CODE=$?
_cleanup_test "stress"
-_remove_backfile "${BACK_FILE}"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh
new file mode 100755
index 000000000000..e0854f71d35b
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_03.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_03"
+ERR_CODE=0
+
+ublk_io_and_remove()
+{
+ run_io_and_remove "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_feature "ZERO_COPY"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stress" "run IO and remove device(zero copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_io_and_remove 8G -t null -q 4 -z &
+ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
+ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh
new file mode 100755
index 000000000000..1798a98387e8
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_04.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_04"
+ERR_CODE=0
+
+ublk_io_and_kill_daemon()
+{
+ run_io_and_kill_daemon "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+if ! _have_feature "ZERO_COPY"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stress" "run IO and kill ublk server(zero copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_io_and_kill_daemon 8G -t null -q 4 -z &
+ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
+ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh
new file mode 100755
index 000000000000..88601b48f1cd
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_05.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_05"
+ERR_CODE=0
+
+run_io_and_remove()
+{
+ local size=$1
+ local dev_id
+ local dev_pid
+ shift 1
+
+ dev_id=$(_add_ublk_dev "$@")
+ _check_add_dev $TID $?
+
+ [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
+
+ fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
+ --rw=readwrite --iodepth=128 --size="${size}" --numjobs=4 \
+ --runtime=40 --time_based > /dev/null 2>&1 &
+ sleep 4
+
+ dev_pid=$(_get_ublk_daemon_pid "$dev_id")
+ kill -9 "$dev_pid"
+
+ if ! __remove_ublk_dev_return "${dev_id}"; then
+ echo "delete dev ${dev_id} failed"
+ return 255
+ fi
+}
+
+ublk_io_and_remove()
+{
+ run_io_and_remove "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+_prep_test "stress" "run IO and remove device with recovery enabled"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+for reissue in $(seq 0 1); do
+ ublk_io_and_remove 8G -t null -q 4 -g -r 1 -i "$reissue" &
+ ublk_io_and_remove 256M -t loop -q 4 -g -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" &
+ wait
+done
+
+if _have_feature "ZERO_COPY"; then
+ for reissue in $(seq 0 1); do
+ ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" &
+ ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
+ wait
+ done
+fi
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh
index c01f3dc325ab..4e4f0fdf3c9b 100755
--- a/tools/testing/selftests/ublk/test_stripe_01.sh
+++ b/tools/testing/selftests/ublk/test_stripe_01.sh
@@ -6,29 +6,21 @@
TID="stripe_01"
ERR_CODE=0
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
_prep_test "stripe" "write and verify test"
-backfile_0=$(_create_backfile 256M)
-backfile_1=$(_create_backfile 256M)
+_create_backfile 0 256M
+_create_backfile 1 256M
-dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
-_check_add_dev $TID $? "${backfile_0}"
+dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
-fio --name=write_and_verify \
- --filename=/dev/ublkb"${dev_id}" \
- --ioengine=libaio --iodepth=32 \
- --rw=write \
- --size=512M \
- --direct=1 \
- --verify=crc32c \
- --do_verify=1 \
- --bs=4k > /dev/null 2>&1
+_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
ERR_CODE=$?
_cleanup_test "stripe"
-
-_remove_backfile "$backfile_0"
-_remove_backfile "$backfile_1"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_02.sh b/tools/testing/selftests/ublk/test_stripe_02.sh
index e8a45fa82dde..5820ab2efba4 100755
--- a/tools/testing/selftests/ublk/test_stripe_02.sh
+++ b/tools/testing/selftests/ublk/test_stripe_02.sh
@@ -8,17 +8,14 @@ ERR_CODE=0
_prep_test "stripe" "mkfs & mount & umount"
-backfile_0=$(_create_backfile 256M)
-backfile_1=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
-_check_add_dev $TID $? "$backfile_0" "$backfile_1"
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "stripe"
-
-_remove_backfile "$backfile_0"
-_remove_backfile "$backfile_1"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh
new file mode 100755
index 000000000000..20b977e27814
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stripe_03.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="stripe_03"
+ERR_CODE=0
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stripe" "write and verify test"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -q 2 -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
+
+# run fio over the ublk disk
+_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
+ERR_CODE=$?
+
+_cleanup_test "stripe"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh
new file mode 100755
index 000000000000..1b51ed2f1d84
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stripe_04.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="stripe_04"
+ERR_CODE=0
+
+_prep_test "stripe" "mkfs & mount & umount on zero copy"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t stripe -z -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
+
+_mkfs_mount_test /dev/ublkb"${dev_id}"
+ERR_CODE=$?
+
+_cleanup_test "stripe"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 28422c32cc8f..f703fcfe9f7c 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -19,7 +19,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
- corrupt_xstate_header amx lam test_shadow_stack avx
+ corrupt_xstate_header amx lam test_shadow_stack avx apx
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
@@ -136,3 +136,4 @@ $(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack
$(OUTPUT)/avx_64: CFLAGS += -mno-avx -mno-avx512f
$(OUTPUT)/amx_64: EXTRA_FILES += xstate.c
$(OUTPUT)/avx_64: EXTRA_FILES += xstate.c
+$(OUTPUT)/apx_64: EXTRA_FILES += xstate.c
diff --git a/tools/testing/selftests/x86/apx.c b/tools/testing/selftests/x86/apx.c
new file mode 100644
index 000000000000..d9c8d41b8c5a
--- /dev/null
+++ b/tools/testing/selftests/x86/apx.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include "xstate.h"
+
+int main(void)
+{
+ test_xstate(XFEATURE_APX);
+}
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 18d736640ece..0873b0e5f48b 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -682,7 +682,7 @@ int do_uring(unsigned long lam)
return 1;
if (fstat(file_fd, &st) < 0)
- return 1;
+ goto cleanup;
off_t file_sz = st.st_size;
@@ -690,7 +690,7 @@ int do_uring(unsigned long lam)
fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
if (!fi)
- return 1;
+ goto cleanup;
fi->file_sz = file_sz;
fi->file_fd = file_fd;
@@ -698,7 +698,7 @@ int do_uring(unsigned long lam)
ring = malloc(sizeof(*ring));
if (!ring) {
free(fi);
- return 1;
+ goto cleanup;
}
memset(ring, 0, sizeof(struct io_ring));
@@ -729,6 +729,8 @@ out:
}
free(fi);
+cleanup:
+ close(file_fd);
return ret;
}
@@ -1189,6 +1191,7 @@ void *allocate_dsa_pasid(void)
wq = mmap(NULL, 0x1000, PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd, 0);
+ close(fd);
if (wq == MAP_FAILED)
perror("mmap");
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
index d53959e03593..94bee6e0c813 100644
--- a/tools/testing/selftests/x86/test_mremap_vdso.c
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -14,6 +14,7 @@
#include <errno.h>
#include <unistd.h>
#include <string.h>
+#include <stdbool.h>
#include <sys/mman.h>
#include <sys/auxv.h>
@@ -55,13 +56,55 @@ static int try_to_remap(void *vdso_addr, unsigned long size)
}
+#define VDSO_NAME "[vdso]"
+#define VMFLAGS "VmFlags:"
+#define MSEAL_FLAGS "sl"
+#define MAX_LINE_LEN 512
+
+bool vdso_sealed(FILE *maps)
+{
+ char line[MAX_LINE_LEN];
+ bool has_vdso = false;
+
+ while (fgets(line, sizeof(line), maps)) {
+ if (strstr(line, VDSO_NAME))
+ has_vdso = true;
+
+ if (has_vdso && !strncmp(line, VMFLAGS, strlen(VMFLAGS))) {
+ if (strstr(line, MSEAL_FLAGS))
+ return true;
+
+ return false;
+ }
+ }
+
+ return false;
+}
+
int main(int argc, char **argv, char **envp)
{
pid_t child;
+ FILE *maps;
ksft_print_header();
ksft_set_plan(1);
+ maps = fopen("/proc/self/smaps", "r");
+ if (!maps) {
+ ksft_test_result_skip(
+ "Could not open /proc/self/smaps, errno=%d\n",
+ errno);
+
+ return 0;
+ }
+
+ if (vdso_sealed(maps)) {
+ ksft_test_result_skip("vdso is sealed\n");
+ return 0;
+ }
+
+ fclose(maps);
+
child = fork();
if (child == -1)
ksft_exit_fail_msg("failed to fork (%d): %m\n", errno);
diff --git a/tools/testing/selftests/x86/xstate.c b/tools/testing/selftests/x86/xstate.c
index 23c1d6c964ea..97fe4bd8bc77 100644
--- a/tools/testing/selftests/x86/xstate.c
+++ b/tools/testing/selftests/x86/xstate.c
@@ -31,7 +31,8 @@
(1 << XFEATURE_OPMASK) | \
(1 << XFEATURE_ZMM_Hi256) | \
(1 << XFEATURE_Hi16_ZMM) | \
- (1 << XFEATURE_XTILEDATA))
+ (1 << XFEATURE_XTILEDATA) | \
+ (1 << XFEATURE_APX))
static inline uint64_t xgetbv(uint32_t index)
{
diff --git a/tools/testing/selftests/x86/xstate.h b/tools/testing/selftests/x86/xstate.h
index 42af36ec852f..e91e3092b5d2 100644
--- a/tools/testing/selftests/x86/xstate.h
+++ b/tools/testing/selftests/x86/xstate.h
@@ -33,6 +33,7 @@ enum xfeature {
XFEATURE_RSRVD_COMP_16,
XFEATURE_XTILECFG,
XFEATURE_XTILEDATA,
+ XFEATURE_APX,
XFEATURE_MAX,
};
@@ -59,6 +60,7 @@ static const char *xfeature_names[] =
"unknown xstate feature",
"AMX Tile config",
"AMX Tile data",
+ "APX registers",
"unknown xstate feature",
};
diff --git a/tools/testing/shared/interval_tree-shim.c b/tools/testing/shared/interval_tree-shim.c
new file mode 100644
index 000000000000..122e74756571
--- /dev/null
+++ b/tools/testing/shared/interval_tree-shim.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Very simple shim around the interval tree. */
+
+#include "../../../lib/interval_tree.c"
diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c
index 66dbb362385f..0f97fb0d19e1 100644
--- a/tools/testing/shared/linux.c
+++ b/tools/testing/shared/linux.c
@@ -150,7 +150,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list)
{
if (kmalloc_verbose)
- pr_debug("Bulk free %p[0-%lu]\n", list, size - 1);
+ pr_debug("Bulk free %p[0-%zu]\n", list, size - 1);
pthread_mutex_lock(&cachep->lock);
for (int i = 0; i < size; i++)
@@ -168,7 +168,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
size_t i;
if (kmalloc_verbose)
- pr_debug("Bulk alloc %lu\n", size);
+ pr_debug("Bulk alloc %zu\n", size);
pthread_mutex_lock(&cachep->lock);
if (cachep->nr_objs >= size) {
diff --git a/tools/testing/shared/linux/cleanup.h b/tools/testing/shared/linux/cleanup.h
new file mode 100644
index 000000000000..ea3081426ee9
--- /dev/null
+++ b/tools/testing/shared/linux/cleanup.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "../../../../include/linux/cleanup.h"
diff --git a/tools/testing/shared/linux/interval_tree.h b/tools/testing/shared/linux/interval_tree.h
new file mode 100644
index 000000000000..129faf9f1d0a
--- /dev/null
+++ b/tools/testing/shared/linux/interval_tree.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TEST_INTERVAL_TREE_H
+#define _TEST_INTERVAL_TREE_H
+
+#include "../../../../include/linux/interval_tree.h"
+
+#endif /* _TEST_INTERVAL_TREE_H */
diff --git a/tools/testing/shared/linux/interval_tree_generic.h b/tools/testing/shared/linux/interval_tree_generic.h
new file mode 100644
index 000000000000..34cd654bee61
--- /dev/null
+++ b/tools/testing/shared/linux/interval_tree_generic.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "../../../../include/linux/interval_tree_generic.h"
diff --git a/tools/testing/shared/linux/rbtree.h b/tools/testing/shared/linux/rbtree.h
new file mode 100644
index 000000000000..d644bb7360bf
--- /dev/null
+++ b/tools/testing/shared/linux/rbtree.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TEST_RBTREE_H
+#define _TEST_RBTREE_H
+
+#include <linux/kernel.h>
+#include "../../../../include/linux/rbtree.h"
+
+#endif /* _TEST_RBTREE_H */
diff --git a/tools/testing/shared/linux/rbtree_augmented.h b/tools/testing/shared/linux/rbtree_augmented.h
new file mode 100644
index 000000000000..ad138fcf6652
--- /dev/null
+++ b/tools/testing/shared/linux/rbtree_augmented.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TEST_RBTREE_AUGMENTED_H
+#define _TEST_RBTREE_AUGMENTED_H
+
+#include "../../../../include/linux/rbtree_augmented.h"
+
+#endif /* _TEST_RBTREE_AUGMENTED_H */
diff --git a/tools/testing/shared/linux/rbtree_types.h b/tools/testing/shared/linux/rbtree_types.h
new file mode 100644
index 000000000000..194194a5bf92
--- /dev/null
+++ b/tools/testing/shared/linux/rbtree_types.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TEST_RBTREE_TYPES_H
+#define _TEST_RBTREE_TYPES_H
+
+#include "../../../../include/linux/rbtree_types.h"
+
+#endif /* _TEST_RBTREE_TYPES_H */
+
diff --git a/tools/testing/shared/rbtree-shim.c b/tools/testing/shared/rbtree-shim.c
new file mode 100644
index 000000000000..7692a993e5f1
--- /dev/null
+++ b/tools/testing/shared/rbtree-shim.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Very simple shim around the rbtree. */
+
+#include "../../../lib/rbtree.c"
+
diff --git a/tools/testing/vma/linux/atomic.h b/tools/testing/vma/linux/atomic.h
index 3e1b6adc027b..788c597c4fde 100644
--- a/tools/testing/vma/linux/atomic.h
+++ b/tools/testing/vma/linux/atomic.h
@@ -9,4 +9,9 @@
#define atomic_set(x, y) uatomic_set(x, y)
#define U8_MAX UCHAR_MAX
+#ifndef atomic_cmpxchg_relaxed
+#define atomic_cmpxchg_relaxed uatomic_cmpxchg
+#define atomic_cmpxchg_release uatomic_cmpxchg
+#endif /* atomic_cmpxchg_relaxed */
+
#endif /* _LINUX_ATOMIC_H */
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index 04ab45e27fb8..11f761769b5b 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -74,11 +74,23 @@ static struct vm_area_struct *alloc_vma(struct mm_struct *mm,
ret->vm_end = end;
ret->vm_pgoff = pgoff;
ret->__vm_flags = flags;
+ vma_assert_detached(ret);
return ret;
}
/* Helper function to allocate a VMA and link it to the tree. */
+static int attach_vma(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+ int res;
+
+ res = vma_link(mm, vma);
+ if (!res)
+ vma_assert_attached(vma);
+ return res;
+}
+
+/* Helper function to allocate a VMA and link it to the tree. */
static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
unsigned long start,
unsigned long end,
@@ -90,7 +102,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
if (vma == NULL)
return NULL;
- if (vma_link(mm, vma)) {
+ if (attach_vma(mm, vma)) {
vm_area_free(vma);
return NULL;
}
@@ -108,6 +120,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
/* Helper function which provides a wrapper around a merge new VMA operation. */
static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg)
{
+ struct vm_area_struct *vma;
/*
* For convenience, get prev and next VMAs. Which the new VMA operation
* requires.
@@ -116,7 +129,11 @@ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg)
vmg->prev = vma_prev(vmg->vmi);
vma_iter_next_range(vmg->vmi);
- return vma_merge_new_range(vmg);
+ vma = vma_merge_new_range(vmg);
+ if (vma)
+ vma_assert_attached(vma);
+
+ return vma;
}
/*
@@ -125,7 +142,12 @@ static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg)
*/
static struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg)
{
- return vma_merge_existing_range(vmg);
+ struct vm_area_struct *vma;
+
+ vma = vma_merge_existing_range(vmg);
+ if (vma)
+ vma_assert_attached(vma);
+ return vma;
}
/*
@@ -147,13 +169,20 @@ static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
vma_iter_set(vmg->vmi, start);
vmg->prev = NULL;
+ vmg->middle = NULL;
vmg->next = NULL;
- vmg->vma = NULL;
+ vmg->target = NULL;
vmg->start = start;
vmg->end = end;
vmg->pgoff = pgoff;
vmg->flags = flags;
+
+ vmg->just_expand = false;
+ vmg->__remove_middle = false;
+ vmg->__remove_next = false;
+ vmg->__adjust_middle_start = false;
+ vmg->__adjust_next_start = false;
}
/*
@@ -253,8 +282,8 @@ static bool test_simple_merge(void)
.pgoff = 1,
};
- ASSERT_FALSE(vma_link(&mm, vma_left));
- ASSERT_FALSE(vma_link(&mm, vma_right));
+ ASSERT_FALSE(attach_vma(&mm, vma_left));
+ ASSERT_FALSE(attach_vma(&mm, vma_right));
vma = merge_new(&vmg);
ASSERT_NE(vma, NULL);
@@ -278,7 +307,7 @@ static bool test_simple_modify(void)
struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, flags);
VMA_ITERATOR(vmi, &mm, 0x1000);
- ASSERT_FALSE(vma_link(&mm, init_vma));
+ ASSERT_FALSE(attach_vma(&mm, init_vma));
/*
* The flags will not be changed, the vma_modify_flags() function
@@ -338,13 +367,13 @@ static bool test_simple_expand(void)
VMA_ITERATOR(vmi, &mm, 0);
struct vma_merge_struct vmg = {
.vmi = &vmi,
- .vma = vma,
+ .middle = vma,
.start = 0,
.end = 0x3000,
.pgoff = 0,
};
- ASSERT_FALSE(vma_link(&mm, vma));
+ ASSERT_FALSE(attach_vma(&mm, vma));
ASSERT_FALSE(expand_existing(&vmg));
@@ -365,7 +394,7 @@ static bool test_simple_shrink(void)
struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x3000, 0, flags);
VMA_ITERATOR(vmi, &mm, 0);
- ASSERT_FALSE(vma_link(&mm, vma));
+ ASSERT_FALSE(attach_vma(&mm, vma));
ASSERT_FALSE(vma_shrink(&vmi, vma, 0, 0x1000, 0));
@@ -631,7 +660,7 @@ static bool test_vma_merge_special_flags(void)
*/
vma = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, flags);
ASSERT_NE(vma, NULL);
- vmg.vma = vma;
+ vmg.middle = vma;
for (i = 0; i < ARRAY_SIZE(special_flags); i++) {
vm_flags_t special_flag = special_flags[i];
@@ -760,7 +789,7 @@ static bool test_vma_merge_with_close(void)
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
- vmg.vma = vma;
+ vmg.middle = vma;
/*
* The VMA being modified in a way that would otherwise merge should
@@ -787,7 +816,7 @@ static bool test_vma_merge_with_close(void)
vma->vm_ops = &vm_ops;
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), NULL);
/*
* Initially this is misapprehended as an out of memory report, as the
@@ -817,7 +846,7 @@ static bool test_vma_merge_with_close(void)
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
@@ -843,7 +872,7 @@ static bool test_vma_merge_with_close(void)
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -940,7 +969,7 @@ static bool test_merge_existing(void)
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags);
vma_next->vm_ops = &vm_ops; /* This should have no impact. */
vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags);
- vmg.vma = vma;
+ vmg.middle = vma;
vmg.prev = vma;
vma->anon_vma = &dummy_anon_vma;
ASSERT_EQ(merge_existing(&vmg), vma_next);
@@ -973,7 +1002,7 @@ static bool test_merge_existing(void)
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags);
vma_next->vm_ops = &vm_ops; /* This should have no impact. */
vmg_set_range(&vmg, 0x2000, 0x6000, 2, flags);
- vmg.vma = vma;
+ vmg.middle = vma;
vma->anon_vma = &dummy_anon_vma;
ASSERT_EQ(merge_existing(&vmg), vma_next);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -1003,7 +1032,7 @@ static bool test_merge_existing(void)
vma->vm_ops = &vm_ops; /* This should have no impact. */
vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags);
vmg.prev = vma_prev;
- vmg.vma = vma;
+ vmg.middle = vma;
vma->anon_vma = &dummy_anon_vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev);
@@ -1037,7 +1066,7 @@ static bool test_merge_existing(void)
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
vmg.prev = vma_prev;
- vmg.vma = vma;
+ vmg.middle = vma;
vma->anon_vma = &dummy_anon_vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -1067,7 +1096,7 @@ static bool test_merge_existing(void)
vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags);
vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
vmg.prev = vma_prev;
- vmg.vma = vma;
+ vmg.middle = vma;
vma->anon_vma = &dummy_anon_vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -1102,37 +1131,37 @@ static bool test_merge_existing(void)
vmg_set_range(&vmg, 0x4000, 0x5000, 4, flags);
vmg.prev = vma;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags);
vmg.prev = vma;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
vmg_set_range(&vmg, 0x6000, 0x7000, 6, flags);
vmg.prev = vma;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
vmg_set_range(&vmg, 0x4000, 0x7000, 4, flags);
vmg.prev = vma;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
vmg_set_range(&vmg, 0x4000, 0x6000, 4, flags);
vmg.prev = vma;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags);
vmg.prev = vma;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), NULL);
ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE);
@@ -1197,7 +1226,7 @@ static bool test_anon_vma_non_mergeable(void)
vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
vmg.prev = vma_prev;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -1277,7 +1306,7 @@ static bool test_dup_anon_vma(void)
vma_next->anon_vma = &dummy_anon_vma;
vmg_set_range(&vmg, 0, 0x5000, 0, flags);
- vmg.vma = vma_prev;
+ vmg.middle = vma_prev;
vmg.next = vma_next;
ASSERT_EQ(expand_existing(&vmg), 0);
@@ -1309,7 +1338,7 @@ static bool test_dup_anon_vma(void)
vma_next->anon_vma = &dummy_anon_vma;
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -1338,7 +1367,7 @@ static bool test_dup_anon_vma(void)
vma->anon_vma = &dummy_anon_vma;
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -1366,7 +1395,7 @@ static bool test_dup_anon_vma(void)
vma->anon_vma = &dummy_anon_vma;
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -1394,7 +1423,7 @@ static bool test_dup_anon_vma(void)
vma->anon_vma = &dummy_anon_vma;
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma;
- vmg.vma = vma;
+ vmg.middle = vma;
ASSERT_EQ(merge_existing(&vmg), vma_next);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -1432,7 +1461,7 @@ static bool test_vmi_prealloc_fail(void)
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
- vmg.vma = vma;
+ vmg.middle = vma;
fail_prealloc = true;
@@ -1458,7 +1487,7 @@ static bool test_vmi_prealloc_fail(void)
vma->anon_vma = &dummy_anon_vma;
vmg_set_range(&vmg, 0, 0x5000, 3, flags);
- vmg.vma = vma_prev;
+ vmg.middle = vma_prev;
vmg.next = vma;
fail_prealloc = true;
@@ -1515,11 +1544,11 @@ static bool test_copy_vma(void)
vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks);
-
ASSERT_NE(vma_new, vma);
ASSERT_EQ(vma_new->vm_start, 0);
ASSERT_EQ(vma_new->vm_end, 0x2000);
ASSERT_EQ(vma_new->vm_pgoff, 0);
+ vma_assert_attached(vma_new);
cleanup_mm(&mm, &vmi);
@@ -1528,6 +1557,7 @@ static bool test_copy_vma(void)
vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, flags);
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, flags);
vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks);
+ vma_assert_attached(vma_new);
ASSERT_EQ(vma_new, vma_next);
@@ -1546,7 +1576,7 @@ static bool test_expand_only_mode(void)
/*
* Place a VMA prior to the one we're expanding so we assert that we do
* not erroneously try to traverse to the previous VMA even though we
- * have, through the use of VMG_FLAG_JUST_EXPAND, indicated we do not
+ * have, through the use of the just_expand flag, indicated we do not
* need to do so.
*/
alloc_and_link_vma(&mm, 0, 0x2000, 0, flags);
@@ -1558,7 +1588,7 @@ static bool test_expand_only_mode(void)
vma_iter_set(&vmi, 0x3000);
vma_prev = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
- vmg.merge_flags = VMG_FLAG_JUST_EXPAND;
+ vmg.just_expand = true;
vma = vma_merge_new_range(&vmg);
ASSERT_NE(vma, NULL);
@@ -1569,6 +1599,7 @@ static bool test_expand_only_mode(void)
ASSERT_EQ(vma->vm_pgoff, 3);
ASSERT_TRUE(vma_write_started(vma));
ASSERT_EQ(vma_iter_addr(&vmi), 0x3000);
+ vma_assert_attached(vma);
cleanup_mm(&mm, &vmi);
return true;
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 1eae23039854..572ab2cea763 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -25,7 +25,7 @@
#include <linux/maple_tree.h>
#include <linux/mm.h>
#include <linux/rbtree.h>
-#include <linux/rwsem.h>
+#include <linux/refcount.h>
extern unsigned long stack_guard_gap;
#ifdef CONFIG_MMU
@@ -135,10 +135,6 @@ typedef __bitwise unsigned int vm_fault_t;
*/
#define pr_warn_once pr_err
-typedef struct refcount_struct {
- atomic_t refs;
-} refcount_t;
-
struct kref {
refcount_t refcount;
};
@@ -233,15 +229,12 @@ struct mm_struct {
unsigned long flags; /* Must use atomic bitops to access */
};
-struct vma_lock {
- struct rw_semaphore lock;
-};
-
-
struct file {
struct address_space *f_mapping;
};
+#define VMA_LOCK_OFFSET 0x40000000
+
struct vm_area_struct {
/* The first cache line has the info for VMA tree walking. */
@@ -269,16 +262,13 @@ struct vm_area_struct {
};
#ifdef CONFIG_PER_VMA_LOCK
- /* Flag to indicate areas detached from the mm->mm_mt tree */
- bool detached;
-
/*
* Can only be written (using WRITE_ONCE()) while holding both:
* - mmap_lock (in write mode)
- * - vm_lock->lock (in write mode)
+ * - vm_refcnt bit at VMA_LOCK_OFFSET is set
* Can be read reliably while holding one of:
* - mmap_lock (in read or write mode)
- * - vm_lock->lock (in read or write mode)
+ * - vm_refcnt bit at VMA_LOCK_OFFSET is set or vm_refcnt > 1
* Can be read unreliably (using READ_ONCE()) for pessimistic bailout
* while holding nothing (except RCU to keep the VMA struct allocated).
*
@@ -287,20 +277,9 @@ struct vm_area_struct {
* slowpath.
*/
unsigned int vm_lock_seq;
- struct vma_lock *vm_lock;
#endif
/*
- * For areas with an address space and backing store,
- * linkage into the address_space->i_mmap interval tree.
- *
- */
- struct {
- struct rb_node rb;
- unsigned long rb_subtree_last;
- } shared;
-
- /*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
* list, after a COW of one of the file pages. A MAP_SHARED vma
* can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
@@ -319,14 +298,6 @@ struct vm_area_struct {
struct file * vm_file; /* File we map to (can be NULL). */
void * vm_private_data; /* was vm_pte (shared mem) */
-#ifdef CONFIG_ANON_VMA_NAME
- /*
- * For private and shared anonymous mappings, a pointer to a null
- * terminated string containing the name given to the vma, or NULL if
- * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
- */
- struct anon_vma_name *anon_name;
-#endif
#ifdef CONFIG_SWAP
atomic_long_t swap_readahead_info;
#endif
@@ -339,6 +310,27 @@ struct vm_area_struct {
#ifdef CONFIG_NUMA_BALANCING
struct vma_numab_state *numab_state; /* NUMA Balancing state */
#endif
+#ifdef CONFIG_PER_VMA_LOCK
+ /* Unstable RCU readers are allowed to read this. */
+ refcount_t vm_refcnt;
+#endif
+ /*
+ * For areas with an address space and backing store,
+ * linkage into the address_space->i_mmap interval tree.
+ *
+ */
+ struct {
+ struct rb_node rb;
+ unsigned long rb_subtree_last;
+ } shared;
+#ifdef CONFIG_ANON_VMA_NAME
+ /*
+ * For private and shared anonymous mappings, a pointer to a null
+ * terminated string containing the name given to the vma, or NULL if
+ * unnamed. Serialized by mmap_lock. Use anon_vma_name to access.
+ */
+ struct anon_vma_name *anon_name;
+#endif
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
} __randomize_layout;
@@ -464,26 +456,40 @@ static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi)
return mas_find(&vmi->mas, ULONG_MAX);
}
-static inline bool vma_lock_alloc(struct vm_area_struct *vma)
+/*
+ * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these
+ * assertions should be made either under mmap_write_lock or when the object
+ * has been isolated under mmap_write_lock, ensuring no competing writers.
+ */
+static inline void vma_assert_attached(struct vm_area_struct *vma)
{
- vma->vm_lock = calloc(1, sizeof(struct vma_lock));
-
- if (!vma->vm_lock)
- return false;
-
- init_rwsem(&vma->vm_lock->lock);
- vma->vm_lock_seq = UINT_MAX;
+ WARN_ON_ONCE(!refcount_read(&vma->vm_refcnt));
+}
- return true;
+static inline void vma_assert_detached(struct vm_area_struct *vma)
+{
+ WARN_ON_ONCE(refcount_read(&vma->vm_refcnt));
}
static inline void vma_assert_write_locked(struct vm_area_struct *);
-static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached)
+static inline void vma_mark_attached(struct vm_area_struct *vma)
{
- /* When detaching vma should be write-locked */
- if (detached)
- vma_assert_write_locked(vma);
- vma->detached = detached;
+ vma_assert_write_locked(vma);
+ vma_assert_detached(vma);
+ refcount_set_release(&vma->vm_refcnt, 1);
+}
+
+static inline void vma_mark_detached(struct vm_area_struct *vma)
+{
+ vma_assert_write_locked(vma);
+ vma_assert_attached(vma);
+ /* We are the only writer, so no need to use vma_refcount_put(). */
+ if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
+ /*
+ * Reader must have temporarily raised vm_refcnt but it will
+ * drop it without using the vma since vma is write-locked.
+ */
+ }
}
extern const struct vm_operations_struct vma_dummy_vm_ops;
@@ -496,7 +502,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
vma->vm_mm = mm;
vma->vm_ops = &vma_dummy_vm_ops;
INIT_LIST_HEAD(&vma->anon_vma_chain);
- vma_mark_detached(vma, false);
+ vma->vm_lock_seq = UINT_MAX;
}
static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
@@ -507,10 +513,6 @@ static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm)
return NULL;
vma_init(vma, mm);
- if (!vma_lock_alloc(vma)) {
- free(vma);
- return NULL;
- }
return vma;
}
@@ -523,10 +525,8 @@ static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
return NULL;
memcpy(new, orig, sizeof(*new));
- if (!vma_lock_alloc(new)) {
- free(new);
- return NULL;
- }
+ refcount_set(&new->vm_refcnt, 0);
+ new->vm_lock_seq = UINT_MAX;
INIT_LIST_HEAD(&new->anon_vma_chain);
return new;
@@ -696,20 +696,9 @@ static inline void mpol_put(struct mempolicy *)
{
}
-static inline void vma_lock_free(struct vm_area_struct *vma)
-{
- free(vma->vm_lock);
-}
-
-static inline void __vm_area_free(struct vm_area_struct *vma)
-{
- vma_lock_free(vma);
- free(vma);
-}
-
static inline void vm_area_free(struct vm_area_struct *vma)
{
- __vm_area_free(vma);
+ free(vma);
}
static inline void lru_add_drain(void)
@@ -796,12 +785,12 @@ static inline void vma_start_write(struct vm_area_struct *vma)
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
- long adjust_next)
+ struct vm_area_struct *next)
{
(void)vma;
(void)start;
(void)end;
- (void)adjust_next;
+ (void)next;
}
static inline void vma_iter_free(struct vma_iterator *vmi)
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
index 1f3a15b954b9..204ef0e9f542 100644
--- a/tools/virtio/linux/compiler.h
+++ b/tools/virtio/linux/compiler.h
@@ -10,4 +10,29 @@
#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var))))
#define __aligned(x) __attribute((__aligned__(x)))
+
+/**
+ * data_race - mark an expression as containing intentional data races
+ *
+ * This data_race() macro is useful for situations in which data races
+ * should be forgiven. One example is diagnostic code that accesses
+ * shared variables but is not a part of the core synchronization design.
+ * For example, if accesses to a given variable are protected by a lock,
+ * except for diagnostic code, then the accesses under the lock should
+ * be plain C-language accesses and those in the diagnostic code should
+ * use data_race(). This way, KCSAN will complain if buggy lockless
+ * accesses to that variable are introduced, even if the buggy accesses
+ * are protected by READ_ONCE() or WRITE_ONCE().
+ *
+ * This macro *does not* affect normal code generation, but is a hint
+ * to tooling that data races here are to be ignored. If the access must
+ * be atomic *and* KCSAN should ignore the access, use both data_race()
+ * and READ_ONCE(), for example, data_race(READ_ONCE(x)).
+ */
+#define data_race(expr) \
+({ \
+ __auto_type __v = (expr); \
+ __v; \
+})
+
#endif
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
index 822ecaa8e4df..095958461788 100644
--- a/tools/virtio/linux/dma-mapping.h
+++ b/tools/virtio/linux/dma-mapping.h
@@ -31,6 +31,7 @@ enum dma_data_direction {
#define dma_unmap_page(d, a, s, r) do { (void)(d); (void)(a); (void)(s); (void)(r); } while (0)
#define sg_dma_address(sg) (0)
+#define sg_dma_len(sg) (0)
#define dma_need_sync(v, a) (0)
#define dma_unmap_single_attrs(d, a, s, r, t) do { \
(void)(d); (void)(a); (void)(s); (void)(r); (void)(t); \
@@ -43,4 +44,16 @@ enum dma_data_direction {
} while (0)
#define dma_max_mapping_size(...) SIZE_MAX
+/*
+ * A dma_addr_t can hold any valid DMA or bus address for the platform. It can
+ * be given to a device to use as a DMA source or target. It is specific to a
+ * given device and there may be a translation between the CPU physical address
+ * space and the bus address space.
+ *
+ * DMA_MAPPING_ERROR is the magic error code if a mapping failed. It should not
+ * be used directly in drivers, but checked for using dma_mapping_error()
+ * instead.
+ */
+#define DMA_MAPPING_ERROR (~(dma_addr_t)0)
+
#endif
diff --git a/tools/virtio/linux/module.h b/tools/virtio/linux/module.h
index 9dfa96fea2b2..b91681fc1571 100644
--- a/tools/virtio/linux/module.h
+++ b/tools/virtio/linux/module.h
@@ -5,3 +5,10 @@
static __attribute__((unused)) const char *__MODULE_LICENSE_name = \
__MODULE_LICENSE_value
+#ifndef MODULE_AUTHOR
+#define MODULE_AUTHOR(x)
+#endif
+
+#ifndef MODULE_DESCRIPTION
+#define MODULE_DESCRIPTION(x)
+#endif