aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/accounting/getdelays.c32
-rw-r--r--tools/arch/arm64/tools/Makefile6
-rw-r--r--tools/arch/x86/include/asm/amd-ibs.h3
-rw-r--r--tools/arch/x86/include/asm/asm.h8
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h8
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h161
-rw-r--r--tools/arch/x86/include/asm/msr-index.h2
-rw-r--r--tools/arch/x86/include/asm/nops.h2
-rw-r--r--tools/arch/x86/include/asm/orc_types.h4
-rw-r--r--tools/arch/x86/include/asm/pvclock-abi.h4
-rw-r--r--tools/arch/x86/include/asm/required-features.h105
-rw-r--r--tools/bpf/Makefile6
-rw-r--r--tools/bpf/bpftool/Documentation/Makefile6
-rw-r--r--tools/bpf/bpftool/Makefile6
-rw-r--r--tools/bpf/resolve_btfids/Makefile2
-rw-r--r--tools/bpf/runqslower/Makefile5
-rw-r--r--tools/build/Makefile8
-rw-r--r--tools/include/linux/bits.h2
-rw-r--r--tools/include/nolibc/Makefile1
-rw-r--r--tools/include/nolibc/arch-mips.h1
-rw-r--r--tools/include/nolibc/arch-s390.h9
-rw-r--r--tools/include/nolibc/arch.h2
-rw-r--r--tools/include/nolibc/crt.h2
-rw-r--r--tools/include/nolibc/dirent.h98
-rw-r--r--tools/include/nolibc/errno.h2
-rw-r--r--tools/include/nolibc/nolibc.h4
-rw-r--r--tools/include/nolibc/signal.h1
-rw-r--r--tools/include/nolibc/stackprotector.h2
-rw-r--r--tools/include/nolibc/stdio.h98
-rw-r--r--tools/include/nolibc/stdlib.h1
-rw-r--r--tools/include/nolibc/string.h4
-rw-r--r--tools/include/nolibc/sys.h83
-rw-r--r--tools/include/uapi/asm-generic/socket.h21
-rw-r--r--tools/include/uapi/linux/const.h2
-rw-r--r--tools/lib/bpf/Makefile13
-rw-r--r--tools/lib/perf/Makefile13
-rw-r--r--tools/lib/thermal/Makefile13
-rw-r--r--tools/memory-model/Documentation/glossary.txt32
-rw-r--r--tools/memory-model/Documentation/herd-representation.txt49
-rw-r--r--tools/memory-model/README4
-rw-r--r--tools/memory-model/linux-kernel.bell33
-rw-r--r--tools/memory-model/linux-kernel.cat10
-rw-r--r--tools/memory-model/linux-kernel.cfg1
-rw-r--r--tools/memory-model/linux-kernel.def169
-rw-r--r--tools/mm/page-types.c4
-rw-r--r--tools/objtool/Documentation/objtool.txt105
-rw-r--r--tools/objtool/Makefile8
-rw-r--r--tools/objtool/arch/loongarch/decode.c28
-rw-r--r--tools/objtool/arch/loongarch/include/arch/elf.h7
-rw-r--r--tools/objtool/arch/loongarch/special.c159
-rw-r--r--tools/objtool/arch/powerpc/decode.c14
-rw-r--r--tools/objtool/arch/x86/decode.c14
-rw-r--r--tools/objtool/builtin-check.c208
-rw-r--r--tools/objtool/check.c112
-rw-r--r--tools/objtool/elf.c3
-rw-r--r--tools/objtool/include/objtool/arch.h3
-rw-r--r--tools/objtool/include/objtool/builtin.h3
-rw-r--r--tools/objtool/include/objtool/elf.h2
-rw-r--r--tools/objtool/include/objtool/special.h2
-rw-r--r--tools/objtool/include/objtool/warn.h20
-rw-r--r--tools/objtool/noreturns.h4
-rw-r--r--tools/objtool/objtool.c78
-rw-r--r--tools/objtool/orc_dump.c7
-rw-r--r--tools/perf/Makefile.perf41
-rwxr-xr-xtools/perf/check-headers.sh2
-rw-r--r--tools/power/x86/turbostat/turbostat.c2
-rw-r--r--tools/sched_ext/include/scx/common.bpf.h59
-rw-r--r--tools/sched_ext/include/scx/common.h1
-rw-r--r--tools/sched_ext/include/scx/compat.bpf.h95
-rw-r--r--tools/sched_ext/include/scx/compat.h16
-rw-r--r--tools/sched_ext/include/scx/enum_defs.autogen.h120
-rw-r--r--tools/sched_ext/scx_central.c15
-rw-r--r--tools/sched_ext/scx_qmap.bpf.c23
-rw-r--r--tools/scripts/Makefile.include30
-rwxr-xr-xtools/sound/dapm-graph2
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/bpf/Makefile.docs6
-rw-r--r--tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c129
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_strp.c454
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_strp.c53
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_array_access.c15
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_v1_hp.sh2
-rwxr-xr-xtools/testing/selftests/damon/damon_nr_regions.py2
-rwxr-xr-xtools/testing/selftests/damon/damos_quota.py9
-rwxr-xr-xtools/testing/selftests/damon/damos_quota_goal.py3
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/hds.py145
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py200
-rwxr-xr-xtools/testing/selftests/drivers/net/queues.py7
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/.gitignore2
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/Makefile6
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c516
-rw-r--r--tools/testing/selftests/filesystems/nsfs/iterate_mntns.c14
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/Makefile11
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c507
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/wrappers.h17
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount.h2
-rw-r--r--tools/testing/selftests/filesystems/utils.c501
-rw-r--r--tools/testing/selftests/filesystems/utils.h45
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc58
-rw-r--r--tools/testing/selftests/hid/Makefile2
-rw-r--r--tools/testing/selftests/kselftest.h5
-rwxr-xr-xtools/testing/selftests/kselftest/module.sh2
-rw-r--r--tools/testing/selftests/kvm/mmu_stress_test.c21
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_cpuid.c47
-rw-r--r--tools/testing/selftests/kvm/x86/nested_exceptions_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c3
-rw-r--r--tools/testing/selftests/landlock/.gitignore2
-rw-r--r--tools/testing/selftests/landlock/common.h1
-rw-r--r--tools/testing/selftests/landlock/config3
-rw-r--r--tools/testing/selftests/landlock/net_test.c124
-rw-r--r--tools/testing/selftests/lib/Makefile2
-rw-r--r--tools/testing/selftests/lib/config3
-rwxr-xr-xtools/testing/selftests/lib/prime_numbers.sh4
-rwxr-xr-xtools/testing/selftests/lib/printf.sh4
-rwxr-xr-xtools/testing/selftests/lib/scanf.sh4
-rw-r--r--tools/testing/selftests/mm/guard-pages.c16
-rw-r--r--tools/testing/selftests/mm/hugepage-mremap.c2
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c8
-rw-r--r--tools/testing/selftests/mm/memfd_secret.c14
-rw-r--r--tools/testing/selftests/mm/mkdirty.c8
-rw-r--r--tools/testing/selftests/mm/mlock2.h1
-rw-r--r--tools/testing/selftests/mm/protection_keys.c2
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh6
-rw-r--r--tools/testing/selftests/mm/uffd-common.c4
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c15
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c14
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c652
-rw-r--r--tools/testing/selftests/net/Makefile1
-rw-r--r--tools/testing/selftests/net/config2
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_port_range.sh46
-rw-r--r--tools/testing/selftests/net/lib/Makefile3
-rw-r--r--tools/testing/selftests/net/lib/xdp_dummy.bpf.c19
-rwxr-xr-xtools/testing/selftests/net/lwt_dst_cache_ref_loop.sh246
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter.sh7
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter_queue.sh7
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh1
-rw-r--r--tools/testing/selftests/nolibc/Makefile30
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test-linkage.c6
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c138
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh26
-rw-r--r--tools/testing/selftests/pidfd/.gitignore2
-rw-r--r--tools/testing/selftests/pidfd/Makefile4
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h109
-rw-r--r--tools/testing/selftests/pidfd/pidfd_exec_helper.c12
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_info_test.c692
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c30
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c45
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c76
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/srcu_lockdep.sh2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot6
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE073
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE103
-rw-r--r--tools/testing/selftests/rseq/.gitignore1
-rw-r--r--tools/testing/selftests/rseq/Makefile9
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv-bits.h6
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv.h2
-rw-r--r--tools/testing/selftests/rseq/rseq.c27
-rw-r--r--tools/testing/selftests/rseq/rseq.h5
-rwxr-xr-xtools/testing/selftests/rseq/run_syscall_errors_test.sh5
-rw-r--r--tools/testing/selftests/rseq/syscall_errors_test.c124
-rw-r--r--tools/testing/selftests/sched/config2
-rw-r--r--tools/testing/selftests/sched_ext/Makefile1
-rw-r--r--tools/testing/selftests/sched_ext/config1
-rw-r--r--tools/testing/selftests/sched_ext/create_dsq.c10
-rw-r--r--tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c7
-rw-r--r--tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c7
-rw-r--r--tools/testing/selftests/sched_ext/dsp_local_on.bpf.c2
-rw-r--r--tools/testing/selftests/sched_ext/dsp_local_on.c1
-rw-r--r--tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c10
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu_fails.c10
-rw-r--r--tools/testing/selftests/sched_ext/exit.c1
-rw-r--r--tools/testing/selftests/sched_ext/hotplug.c6
-rw-r--r--tools/testing/selftests/sched_ext/init_enable_count.c27
-rw-r--r--tools/testing/selftests/sched_ext/maximal.c7
-rw-r--r--tools/testing/selftests/sched_ext/maybe_null.c2
-rw-r--r--tools/testing/selftests/sched_ext/minimal.c10
-rw-r--r--tools/testing/selftests/sched_ext/numa.bpf.c100
-rw-r--r--tools/testing/selftests/sched_ext/numa.c59
-rw-r--r--tools/testing/selftests/sched_ext/prog_run.c10
-rw-r--r--tools/testing/selftests/sched_ext/reload_loop.c9
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dfl.c7
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c7
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch.c7
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c7
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c7
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_vtime.c7
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c6
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json25
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c10
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config1
-rw-r--r--tools/testing/selftests/x86/Makefile6
-rw-r--r--tools/testing/selftests/x86/amx.c442
-rw-r--r--tools/testing/selftests/x86/avx.c12
-rw-r--r--tools/testing/selftests/x86/corrupt_xstate_header.c14
-rw-r--r--tools/testing/selftests/x86/entry_from_vm86.c24
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c24
-rw-r--r--tools/testing/selftests/x86/helpers.h28
-rw-r--r--tools/testing/selftests/x86/ioperm.c25
-rw-r--r--tools/testing/selftests/x86/iopl.c25
-rw-r--r--tools/testing/selftests/x86/lam.c151
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c18
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c14
-rw-r--r--tools/testing/selftests/x86/ptrace_syscall.c24
-rw-r--r--tools/testing/selftests/x86/sigaltstack.c26
-rw-r--r--tools/testing/selftests/x86/sigreturn.c24
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c22
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c12
-rw-r--r--tools/testing/selftests/x86/syscall_nt.c12
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c3
-rw-r--r--tools/testing/selftests/x86/sysret_rip.c24
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c13
-rw-r--r--tools/testing/selftests/x86/unwind_vdso.c12
-rw-r--r--tools/testing/selftests/x86/xstate.c477
-rw-r--r--tools/testing/selftests/x86/xstate.h195
-rw-r--r--tools/testing/vsock/vsock_test.c41
-rw-r--r--tools/thermal/lib/Makefile13
-rw-r--r--tools/tracing/latency/Makefile6
-rw-r--r--tools/tracing/rtla/Makefile6
-rw-r--r--tools/verification/rv/Makefile6
226 files changed, 7713 insertions, 2035 deletions
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 100ad3dc091a..3feac0482fe9 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -196,22 +196,22 @@ static int get_family_id(int sd)
static void print_delayacct(struct taskstats *t)
{
- printf("\n\nCPU %15s%15s%15s%15s%15s%15s\n"
- " %15llu%15llu%15llu%15llu%15.3fms%13.6fms\n"
- "IO %15s%15s%15s%15s\n"
- " %15llu%15llu%15.3fms%13.6fms\n"
- "SWAP %15s%15s%15s%15s\n"
- " %15llu%15llu%15.3fms%13.6fms\n"
- "RECLAIM %12s%15s%15s%15s\n"
- " %15llu%15llu%15.3fms%13.6fms\n"
- "THRASHING%12s%15s%15s%15s\n"
- " %15llu%15llu%15.3fms%13.6fms\n"
- "COMPACT %12s%15s%15s%15s\n"
- " %15llu%15llu%15.3fms%13.6fms\n"
- "WPCOPY %12s%15s%15s%15s\n"
- " %15llu%15llu%15.3fms%13.6fms\n"
- "IRQ %15s%15s%15s%15s\n"
- " %15llu%15llu%15.3fms%13.6fms\n",
+ printf("\n\nCPU %15s%15s%15s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "IO %15s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "SWAP %15s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "RECLAIM %12s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "THRASHING%12s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "COMPACT %12s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "WPCOPY %12s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n"
+ "IRQ %15s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15.3fms%13.6fms%13.6fms\n",
"count", "real total", "virtual total",
"delay total", "delay average", "delay max", "delay min",
(unsigned long long)t->cpu_count,
diff --git a/tools/arch/arm64/tools/Makefile b/tools/arch/arm64/tools/Makefile
index 7b42feedf647..de4f1b66ef01 100644
--- a/tools/arch/arm64/tools/Makefile
+++ b/tools/arch/arm64/tools/Makefile
@@ -13,12 +13,6 @@ AWK ?= awk
MKDIR ?= mkdir
RM ?= rm
-ifeq ($(V),1)
-Q =
-else
-Q = @
-endif
-
arm64_tools_dir = $(top_srcdir)/arch/arm64/tools
arm64_sysreg_tbl = $(arm64_tools_dir)/sysreg
arm64_gen_sysreg = $(arm64_tools_dir)/gen-sysreg.awk
diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd-ibs.h
index 93807b437e4d..cb1740bc3da2 100644
--- a/tools/arch/x86/include/asm/amd-ibs.h
+++ b/tools/arch/x86/include/asm/amd-ibs.h
@@ -64,7 +64,8 @@ union ibs_op_ctl {
opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
reserved0:5, /* 27-31: reserved */
opcurcnt:27, /* 32-58: periodic op counter current count */
- reserved1:5; /* 59-63: reserved */
+ ldlat_thrsh:4, /* 59-62: Load Latency threshold */
+ ldlat_en:1; /* 63: Load Latency enabled */
};
};
diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h
index 3ad3da9a7d97..dbe39b44256b 100644
--- a/tools/arch/x86/include/asm/asm.h
+++ b/tools/arch/x86/include/asm/asm.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_ASM_H
#define _ASM_X86_ASM_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define __ASM_FORM(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__,
@@ -123,7 +123,7 @@
#ifdef __KERNEL__
/* Exception table entry */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
.pushsection "__ex_table","a" ; \
.balign 4 ; \
@@ -154,7 +154,7 @@
# define _ASM_NOKPROBE(entry)
# endif
-#else /* ! __ASSEMBLY__ */
+#else /* ! __ASSEMBLER__ */
# define _EXPAND_EXTABLE_HANDLE(x) #x
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
" .pushsection \"__ex_table\",\"a\"\n" \
@@ -186,7 +186,7 @@
*/
register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __KERNEL__ */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 17b6590748c0..c691481d59ce 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -2,14 +2,6 @@
#ifndef _ASM_X86_CPUFEATURES_H
#define _ASM_X86_CPUFEATURES_H
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
/*
* Defines x86 CPU feature bits
*/
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
deleted file mode 100644
index c492bdc97b05..000000000000
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ /dev/null
@@ -1,161 +0,0 @@
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#define _ASM_X86_DISABLED_FEATURES_H
-
-/* These features, although they might be available in a CPU
- * will not be used because the compile options to support
- * them are not present.
- *
- * This code allows them to be checked and disabled at
- * compile time without an explicit #ifdef. Use
- * cpu_feature_enabled().
- */
-
-#ifdef CONFIG_X86_UMIP
-# define DISABLE_UMIP 0
-#else
-# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31))
-#endif
-
-#ifdef CONFIG_X86_64
-# define DISABLE_VME (1<<(X86_FEATURE_VME & 31))
-# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
-# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
-# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
-# define DISABLE_PCID 0
-#else
-# define DISABLE_VME 0
-# define DISABLE_K6_MTRR 0
-# define DISABLE_CYRIX_ARR 0
-# define DISABLE_CENTAUR_MCR 0
-# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
-#endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-# define DISABLE_PKU 0
-# define DISABLE_OSPKE 0
-#else
-# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31))
-# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
-#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
-
-#ifdef CONFIG_X86_5LEVEL
-# define DISABLE_LA57 0
-#else
-# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
-# define DISABLE_PTI 0
-#else
-# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_RETPOLINE
-# define DISABLE_RETPOLINE 0
-#else
-# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
-#endif
-
-#ifdef CONFIG_MITIGATION_RETHUNK
-# define DISABLE_RETHUNK 0
-#else
-# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_UNRET_ENTRY
-# define DISABLE_UNRET 0
-#else
-# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
-# define DISABLE_CALL_DEPTH_TRACKING 0
-#else
-# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
-#endif
-
-#ifdef CONFIG_ADDRESS_MASKING
-# define DISABLE_LAM 0
-#else
-# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31))
-#endif
-
-#ifdef CONFIG_INTEL_IOMMU_SVM
-# define DISABLE_ENQCMD 0
-#else
-# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
-#endif
-
-#ifdef CONFIG_X86_SGX
-# define DISABLE_SGX 0
-#else
-# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31))
-#endif
-
-#ifdef CONFIG_XEN_PV
-# define DISABLE_XENPV 0
-#else
-# define DISABLE_XENPV (1 << (X86_FEATURE_XENPV & 31))
-#endif
-
-#ifdef CONFIG_INTEL_TDX_GUEST
-# define DISABLE_TDX_GUEST 0
-#else
-# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
-#endif
-
-#ifdef CONFIG_X86_USER_SHADOW_STACK
-#define DISABLE_USER_SHSTK 0
-#else
-#define DISABLE_USER_SHSTK (1 << (X86_FEATURE_USER_SHSTK & 31))
-#endif
-
-#ifdef CONFIG_X86_KERNEL_IBT
-#define DISABLE_IBT 0
-#else
-#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31))
-#endif
-
-#ifdef CONFIG_X86_FRED
-# define DISABLE_FRED 0
-#else
-# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31))
-#endif
-
-#ifdef CONFIG_KVM_AMD_SEV
-#define DISABLE_SEV_SNP 0
-#else
-#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31))
-#endif
-
-/*
- * Make sure to add features to the correct mask
- */
-#define DISABLED_MASK0 (DISABLE_VME)
-#define DISABLED_MASK1 0
-#define DISABLED_MASK2 0
-#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4 (DISABLE_PCID)
-#define DISABLED_MASK5 0
-#define DISABLED_MASK6 0
-#define DISABLED_MASK7 (DISABLE_PTI)
-#define DISABLED_MASK8 (DISABLE_XENPV|DISABLE_TDX_GUEST)
-#define DISABLED_MASK9 (DISABLE_SGX)
-#define DISABLED_MASK10 0
-#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
- DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
-#define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM)
-#define DISABLED_MASK13 0
-#define DISABLED_MASK14 0
-#define DISABLED_MASK15 0
-#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
- DISABLE_ENQCMD)
-#define DISABLED_MASK17 0
-#define DISABLED_MASK18 (DISABLE_IBT)
-#define DISABLED_MASK19 (DISABLE_SEV_SNP)
-#define DISABLED_MASK20 0
-#define DISABLED_MASK21 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
-
-#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 3ae84c3b8e6d..dc1c1057f26e 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -25,6 +25,7 @@
#define _EFER_SVME 12 /* Enable virtualization */
#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */
#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
+#define _EFER_TCE 15 /* Enable Translation Cache Extensions */
#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */
#define EFER_SCE (1<<_EFER_SCE)
@@ -34,6 +35,7 @@
#define EFER_SVME (1<<_EFER_SVME)
#define EFER_LMSLE (1<<_EFER_LMSLE)
#define EFER_FFXSR (1<<_EFER_FFXSR)
+#define EFER_TCE (1<<_EFER_TCE)
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
/*
diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h
index 1c1b7550fa55..cd94221d8335 100644
--- a/tools/arch/x86/include/asm/nops.h
+++ b/tools/arch/x86/include/asm/nops.h
@@ -82,7 +82,7 @@
#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern const unsigned char * const x86_nops[];
#endif
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index 46d7e06763c9..e0125afa53fb 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -45,7 +45,7 @@
#define ORC_TYPE_REGS 3
#define ORC_TYPE_REGS_PARTIAL 4
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/byteorder.h>
/*
@@ -73,6 +73,6 @@ struct orc_entry {
#endif
} __packed;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ORC_TYPES_H */
diff --git a/tools/arch/x86/include/asm/pvclock-abi.h b/tools/arch/x86/include/asm/pvclock-abi.h
index 1436226efe3e..b9fece5fc96d 100644
--- a/tools/arch/x86/include/asm/pvclock-abi.h
+++ b/tools/arch/x86/include/asm/pvclock-abi.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PVCLOCK_ABI_H
#define _ASM_X86_PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* These structs MUST NOT be changed.
@@ -44,5 +44,5 @@ struct pvclock_wall_clock {
#define PVCLOCK_GUEST_STOPPED (1 << 1)
/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
deleted file mode 100644
index e9187ddd3d1f..000000000000
--- a/tools/arch/x86/include/asm/required-features.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#define _ASM_X86_REQUIRED_FEATURES_H
-
-/* Define minimum CPUID feature set for kernel These bits are checked
- really early to actually display a visible error message before the
- kernel dies. Make sure to assign features to the proper mask!
-
- Some requirements that are not in CPUID yet are also in the
- CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too.
-
- The real information is in arch/x86/Kconfig.cpu, this just converts
- the CONFIGs into a bitmask */
-
-#ifndef CONFIG_MATH_EMULATION
-# define NEED_FPU (1<<(X86_FEATURE_FPU & 31))
-#else
-# define NEED_FPU 0
-#endif
-
-#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-# define NEED_PAE (1<<(X86_FEATURE_PAE & 31))
-#else
-# define NEED_PAE 0
-#endif
-
-#ifdef CONFIG_X86_CMPXCHG64
-# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31))
-#else
-# define NEED_CX8 0
-#endif
-
-#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64)
-# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31))
-#else
-# define NEED_CMOV 0
-#endif
-
-# define NEED_3DNOW 0
-
-#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
-# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))
-#else
-# define NEED_NOPL 0
-#endif
-
-#ifdef CONFIG_MATOM
-# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
-#else
-# define NEED_MOVBE 0
-#endif
-
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT_XXL
-/* Paravirtualized systems may not have PSE or PGE available */
-#define NEED_PSE 0
-#define NEED_PGE 0
-#else
-#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
-#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
-#endif
-#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
-#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
-#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
-#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
-#define NEED_LM (1<<(X86_FEATURE_LM & 31))
-#else
-#define NEED_PSE 0
-#define NEED_MSR 0
-#define NEED_PGE 0
-#define NEED_FXSR 0
-#define NEED_XMM 0
-#define NEED_XMM2 0
-#define NEED_LM 0
-#endif
-
-#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\
- NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\
- NEED_XMM|NEED_XMM2)
-#define SSE_MASK (NEED_XMM|NEED_XMM2)
-
-#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW)
-
-#define REQUIRED_MASK2 0
-#define REQUIRED_MASK3 (NEED_NOPL)
-#define REQUIRED_MASK4 (NEED_MOVBE)
-#define REQUIRED_MASK5 0
-#define REQUIRED_MASK6 0
-#define REQUIRED_MASK7 0
-#define REQUIRED_MASK8 0
-#define REQUIRED_MASK9 0
-#define REQUIRED_MASK10 0
-#define REQUIRED_MASK11 0
-#define REQUIRED_MASK12 0
-#define REQUIRED_MASK13 0
-#define REQUIRED_MASK14 0
-#define REQUIRED_MASK15 0
-#define REQUIRED_MASK16 0
-#define REQUIRED_MASK17 0
-#define REQUIRED_MASK18 0
-#define REQUIRED_MASK19 0
-#define REQUIRED_MASK20 0
-#define REQUIRED_MASK21 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
-
-#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 243b79f2b451..062bbd6cd048 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -27,12 +27,6 @@ srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
endif
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
FEATURE_USER = .bpf
FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled
FEATURE_DISPLAY = libbfd
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index 4315652678b9..bf843f328812 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -5,12 +5,6 @@ INSTALL ?= install
RM ?= rm -f
RMDIR ?= rmdir --ignore-fail-on-non-empty
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
prefix ?= /usr/local
mandir ?= $(prefix)/man
man8dir = $(mandir)/man8
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index dd9f3ec84201..6ea4823b770c 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -7,12 +7,6 @@ srctree := $(patsubst %/,%,$(dir $(srctree)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
endif
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
BPF_DIR = $(srctree)/tools/lib/bpf
ifneq ($(OUTPUT),)
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index 4b8079f294f6..afbddea3a39c 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -5,10 +5,8 @@ include ../../scripts/Makefile.arch
srctree := $(abspath $(CURDIR)/../../../)
ifeq ($(V),1)
- Q =
msg =
else
- Q = @
ifeq ($(silent),1)
msg =
else
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index c4f1f1735af6..e49203ebd48c 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -26,10 +26,7 @@ VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \
$(wildcard $(VMLINUX_BTF_PATHS))))
-ifeq ($(V),1)
-Q =
-else
-Q = @
+ifneq ($(V),1)
MAKEFLAGS += --no-print-directory
submake_extras := feature_display=0
endif
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 18ad131f6ea7..63ef21878761 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -17,13 +17,7 @@ $(call allow-override,LD,$(CROSS_COMPILE)ld)
export HOSTCC HOSTLD HOSTAR
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
-export Q srctree CC LD
+export srctree CC LD
MAKEFLAGS := --no-print-directory
build := -f $(srctree)/tools/build/Makefile.build dir=. obj
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 60044b608817..8de2914e6510 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -41,7 +41,7 @@
* Missing asm support
*
* __GENMASK_U128() depends on _BIT128() which would not work
- * in the asm code, as it shifts an 'unsigned __init128' data
+ * in the asm code, as it shifts an 'unsigned __int128' data
* type instead of direct representation of 128 bit constants
* such as long and unsigned long. The fundamental problem is
* that a 128 bit constant will get silently truncated by the
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index a1f55fb24bb3..dceec0e1a135 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -29,6 +29,7 @@ all_files := \
compiler.h \
crt.h \
ctype.h \
+ dirent.h \
errno.h \
nolibc.h \
signal.h \
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 1791a8ce58da..753a8ed2cf69 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -179,6 +179,7 @@
})
/* startup code, note that it's called __start on MIPS */
+void __start(void);
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void)
{
__asm__ volatile (
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index f9ab83a219b8..df4c3cc713ac 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -5,8 +5,8 @@
#ifndef _NOLIBC_ARCH_S390_H
#define _NOLIBC_ARCH_S390_H
-#include <asm/signal.h>
-#include <asm/unistd.h>
+#include <linux/signal.h>
+#include <linux/unistd.h>
#include "compiler.h"
#include "crt.h"
@@ -143,8 +143,13 @@
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
+#ifdef __s390x__
"lgr %r2, %r15\n" /* save stack pointer to %r2, as arg1 of _start_c */
"aghi %r15, -160\n" /* allocate new stackframe */
+#else
+ "lr %r2, %r15\n"
+ "ahi %r15, -96\n"
+#endif
"xc 0(8,%r15), 0(%r15)\n" /* clear backchain */
"brasl %r14, _start_c\n" /* transfer to c runtime */
);
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index c8f4e5d3add9..8a2c143c0fba 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -29,7 +29,7 @@
#include "arch-powerpc.h"
#elif defined(__riscv)
#include "arch-riscv.h"
-#elif defined(__s390x__)
+#elif defined(__s390x__) || defined(__s390__)
#include "arch-s390.h"
#elif defined(__loongarch__)
#include "arch-loongarch.h"
diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
index bbcd5fd09806..c4b10103bbec 100644
--- a/tools/include/nolibc/crt.h
+++ b/tools/include/nolibc/crt.h
@@ -10,6 +10,7 @@
char **environ __attribute__((weak));
const unsigned long *_auxv __attribute__((weak));
+void _start(void);
static void __stack_chk_init(void);
static void exit(int);
@@ -22,6 +23,7 @@ extern void (*const __init_array_end[])(int, char **, char**) __attribute__((wea
extern void (*const __fini_array_start[])(void) __attribute__((weak));
extern void (*const __fini_array_end[])(void) __attribute__((weak));
+void _start_c(long *sp);
__attribute__((weak,used))
void _start_c(long *sp)
{
diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h
new file mode 100644
index 000000000000..c5c30d0dd680
--- /dev/null
+++ b/tools/include/nolibc/dirent.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Directory access for NOLIBC
+ * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#ifndef _NOLIBC_DIRENT_H
+#define _NOLIBC_DIRENT_H
+
+#include "stdint.h"
+#include "types.h"
+
+#include <linux/limits.h>
+
+struct dirent {
+ ino_t d_ino;
+ char d_name[NAME_MAX + 1];
+};
+
+/* See comment of FILE in stdio.h */
+typedef struct {
+ char dummy[1];
+} DIR;
+
+static __attribute__((unused))
+DIR *fdopendir(int fd)
+{
+ if (fd < 0) {
+ SET_ERRNO(EBADF);
+ return NULL;
+ }
+ return (DIR *)(intptr_t)~fd;
+}
+
+static __attribute__((unused))
+DIR *opendir(const char *name)
+{
+ int fd;
+
+ fd = open(name, O_RDONLY);
+ if (fd == -1)
+ return NULL;
+ return fdopendir(fd);
+}
+
+static __attribute__((unused))
+int closedir(DIR *dirp)
+{
+ intptr_t i = (intptr_t)dirp;
+
+ if (i >= 0) {
+ SET_ERRNO(EBADF);
+ return -1;
+ }
+ return close(~i);
+}
+
+static __attribute__((unused))
+int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
+{
+ char buf[sizeof(struct linux_dirent64) + NAME_MAX + 1];
+ struct linux_dirent64 *ldir = (void *)buf;
+ intptr_t i = (intptr_t)dirp;
+ int fd, ret;
+
+ if (i >= 0)
+ return EBADF;
+
+ fd = ~i;
+
+ ret = sys_getdents64(fd, ldir, sizeof(buf));
+ if (ret < 0)
+ return -ret;
+ if (ret == 0) {
+ *result = NULL;
+ return 0;
+ }
+
+ /*
+ * getdents64() returns as many entries as fit the buffer.
+ * readdir() can only return one entry at a time.
+ * Make sure the non-returned ones are not skipped.
+ */
+ ret = lseek(fd, ldir->d_off, SEEK_SET);
+ if (ret == -1)
+ return errno;
+
+ entry->d_ino = ldir->d_ino;
+ /* the destination should always be big enough */
+ strlcpy(entry->d_name, ldir->d_name, sizeof(entry->d_name));
+ *result = entry;
+ return 0;
+}
+
+/* make sure to include all global symbols */
+#include "nolibc.h"
+
+#endif /* _NOLIBC_DIRENT_H */
diff --git a/tools/include/nolibc/errno.h b/tools/include/nolibc/errno.h
index a44486ff0477..1d8d8033e8ff 100644
--- a/tools/include/nolibc/errno.h
+++ b/tools/include/nolibc/errno.h
@@ -7,7 +7,7 @@
#ifndef _NOLIBC_ERRNO_H
#define _NOLIBC_ERRNO_H
-#include <asm/errno.h>
+#include <linux/errno.h>
#ifndef NOLIBC_IGNORE_ERRNO
#define SET_ERRNO(v) do { errno = (v); } while (0)
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 92436b1e4441..70872401aca8 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -31,8 +31,7 @@
* - The third level is the libc call definition. It exposes the lower raw
* sys_<name>() calls in a way that looks like what a libc usually does,
* takes care of specific input values, and of setting errno upon error.
- * There can be minor variations compared to standard libc calls. For
- * example the open() call always takes 3 args here.
+ * There can be minor variations compared to standard libc calls.
*
* The errno variable is declared static and unused. This way it can be
* optimized away if not used. However this means that a program made of
@@ -105,6 +104,7 @@
#include "string.h"
#include "time.h"
#include "stackprotector.h"
+#include "dirent.h"
/* Used by programs to avoid std includes */
#define NOLIBC
diff --git a/tools/include/nolibc/signal.h b/tools/include/nolibc/signal.h
index 137552216e46..cdcc5904c51e 100644
--- a/tools/include/nolibc/signal.h
+++ b/tools/include/nolibc/signal.h
@@ -13,6 +13,7 @@
#include "sys.h"
/* This one is not marked static as it's needed by libgcc for divide by zero */
+int raise(int signal);
__attribute__((weak,unused,section(".text.nolibc_raise")))
int raise(int signal)
{
diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h
index 1d0d5259ec41..c71a2c257177 100644
--- a/tools/include/nolibc/stackprotector.h
+++ b/tools/include/nolibc/stackprotector.h
@@ -18,6 +18,7 @@
* triggering stack protector errors themselves
*/
+void __stack_chk_fail(void);
__attribute__((weak,used,noreturn,section(".text.nolibc_stack_chk")))
void __stack_chk_fail(void)
{
@@ -28,6 +29,7 @@ void __stack_chk_fail(void)
for (;;);
}
+void __stack_chk_fail_local(void);
__attribute__((weak,noreturn,section(".text.nolibc_stack_chk")))
void __stack_chk_fail_local(void)
{
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index 3892034198dd..a403351dbf60 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -350,6 +350,104 @@ int printf(const char *fmt, ...)
}
static __attribute__((unused))
+int vsscanf(const char *str, const char *format, va_list args)
+{
+ uintmax_t uval;
+ intmax_t ival;
+ int base;
+ char *endptr;
+ int matches;
+ int lpref;
+
+ matches = 0;
+
+ while (1) {
+ if (*format == '%') {
+ /* start of pattern */
+ lpref = 0;
+ format++;
+
+ if (*format == 'l') {
+ /* same as in printf() */
+ lpref = 1;
+ format++;
+ if (*format == 'l') {
+ lpref = 2;
+ format++;
+ }
+ }
+
+ if (*format == '%') {
+ /* literal % */
+ if ('%' != *str)
+ goto done;
+ str++;
+ format++;
+ continue;
+ } else if (*format == 'd') {
+ ival = strtoll(str, &endptr, 10);
+ if (lpref == 0)
+ *va_arg(args, int *) = ival;
+ else if (lpref == 1)
+ *va_arg(args, long *) = ival;
+ else if (lpref == 2)
+ *va_arg(args, long long *) = ival;
+ } else if (*format == 'u' || *format == 'x' || *format == 'X') {
+ base = *format == 'u' ? 10 : 16;
+ uval = strtoull(str, &endptr, base);
+ if (lpref == 0)
+ *va_arg(args, unsigned int *) = uval;
+ else if (lpref == 1)
+ *va_arg(args, unsigned long *) = uval;
+ else if (lpref == 2)
+ *va_arg(args, unsigned long long *) = uval;
+ } else if (*format == 'p') {
+ *va_arg(args, void **) = (void *)strtoul(str, &endptr, 16);
+ } else {
+ SET_ERRNO(EILSEQ);
+ goto done;
+ }
+
+ format++;
+ str = endptr;
+ matches++;
+
+ } else if (*format == '\0') {
+ goto done;
+ } else if (isspace(*format)) {
+ /* skip spaces in format and str */
+ while (isspace(*format))
+ format++;
+ while (isspace(*str))
+ str++;
+ } else if (*format == *str) {
+ /* literal match */
+ format++;
+ str++;
+ } else {
+ if (!matches)
+ matches = EOF;
+ goto done;
+ }
+ }
+
+done:
+ return matches;
+}
+
+static __attribute__((unused, format(scanf, 2, 3)))
+int sscanf(const char *str, const char *format, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, format);
+ ret = vsscanf(str, format, args);
+ va_end(args);
+ return ret;
+}
+
+static __attribute__((unused))
void perror(const char *msg)
{
fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 75aa273c23a6..86ad378ab1ea 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -30,6 +30,7 @@ static __attribute__((unused)) char itoa_buffer[21];
*/
/* must be exported, as it's used by libgcc for various divide functions */
+void abort(void);
__attribute__((weak,unused,noreturn,section(".text.nolibc_abort")))
void abort(void)
{
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index 9ec9c24f38c0..ba84ab700e30 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -32,6 +32,7 @@ int memcmp(const void *s1, const void *s2, size_t n)
/* might be ignored by the compiler without -ffreestanding, then found as
* missing.
*/
+void *memmove(void *dst, const void *src, size_t len);
__attribute__((weak,unused,section(".text.nolibc_memmove")))
void *memmove(void *dst, const void *src, size_t len)
{
@@ -56,6 +57,7 @@ void *memmove(void *dst, const void *src, size_t len)
#ifndef NOLIBC_ARCH_HAS_MEMCPY
/* must be exported, as it's used by libgcc on ARM */
+void *memcpy(void *dst, const void *src, size_t len);
__attribute__((weak,unused,section(".text.nolibc_memcpy")))
void *memcpy(void *dst, const void *src, size_t len)
{
@@ -73,6 +75,7 @@ void *memcpy(void *dst, const void *src, size_t len)
/* might be ignored by the compiler without -ffreestanding, then found as
* missing.
*/
+void *memset(void *dst, int b, size_t len);
__attribute__((weak,unused,section(".text.nolibc_memset")))
void *memset(void *dst, int b, size_t len)
{
@@ -124,6 +127,7 @@ char *strcpy(char *dst, const char *src)
* thus itself, hence the asm() statement below that's meant to disable this
* confusing practice.
*/
+size_t strlen(const char *str);
__attribute__((weak,unused,section(".text.nolibc_strlen")))
size_t strlen(const char *str)
{
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index d4a5c2399a66..08c1c074bec8 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -10,10 +10,10 @@
#include "std.h"
/* system includes */
-#include <asm/unistd.h>
-#include <asm/signal.h> /* for SIGCHLD */
-#include <asm/ioctls.h>
-#include <asm/mman.h>
+#include <linux/unistd.h>
+#include <linux/signal.h> /* for SIGCHLD */
+#include <linux/termios.h>
+#include <linux/mman.h>
#include <linux/fs.h>
#include <linux/loop.h>
#include <linux/time.h>
@@ -23,7 +23,6 @@
#include <linux/prctl.h>
#include <linux/resource.h>
#include <linux/utsname.h>
-#include <linux/signal.h>
#include "arch.h"
#include "errno.h"
@@ -532,20 +531,16 @@ uid_t getuid(void)
/*
- * int ioctl(int fd, unsigned long req, void *value);
+ * int ioctl(int fd, unsigned long cmd, ... arg);
*/
static __attribute__((unused))
-int sys_ioctl(int fd, unsigned long req, void *value)
+long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
- return my_syscall3(__NR_ioctl, fd, req, value);
+ return my_syscall3(__NR_ioctl, fd, cmd, arg);
}
-static __attribute__((unused))
-int ioctl(int fd, unsigned long req, void *value)
-{
- return __sysret(sys_ioctl(fd, req, value));
-}
+#define ioctl(fd, cmd, arg) __sysret(sys_ioctl(fd, cmd, (unsigned long)(arg)))
/*
* int kill(pid_t pid, int signal);
@@ -602,9 +597,36 @@ off_t sys_lseek(int fd, off_t offset, int whence)
}
static __attribute__((unused))
+int sys_llseek(int fd, unsigned long offset_high, unsigned long offset_low,
+ __kernel_loff_t *result, int whence)
+{
+#ifdef __NR_llseek
+ return my_syscall5(__NR_llseek, fd, offset_high, offset_low, result, whence);
+#else
+ return __nolibc_enosys(__func__, fd, offset_high, offset_low, result, whence);
+#endif
+}
+
+static __attribute__((unused))
off_t lseek(int fd, off_t offset, int whence)
{
- return __sysret(sys_lseek(fd, offset, whence));
+ __kernel_loff_t loff = 0;
+ off_t result;
+ int ret;
+
+ result = sys_lseek(fd, offset, whence);
+ if (result == -ENOSYS) {
+ /* Only exists on 32bit where nolibc off_t is also 32bit */
+ ret = sys_llseek(fd, 0, offset, &loff, whence);
+ if (ret < 0)
+ result = ret;
+ else if (loff != (off_t)loff)
+ result = -EOVERFLOW;
+ else
+ result = loff;
+ }
+
+ return __sysret(result);
}
@@ -742,6 +764,31 @@ int mount(const char *src, const char *tgt,
return __sysret(sys_mount(src, tgt, fst, flags, data));
}
+/*
+ * int openat(int dirfd, const char *path, int flags[, mode_t mode]);
+ */
+
+static __attribute__((unused))
+int sys_openat(int dirfd, const char *path, int flags, mode_t mode)
+{
+ return my_syscall4(__NR_openat, dirfd, path, flags, mode);
+}
+
+static __attribute__((unused))
+int openat(int dirfd, const char *path, int flags, ...)
+{
+ mode_t mode = 0;
+
+ if (flags & O_CREAT) {
+ va_list args;
+
+ va_start(args, flags);
+ mode = va_arg(args, mode_t);
+ va_end(args);
+ }
+
+ return __sysret(sys_openat(dirfd, path, flags, mode));
+}
/*
* int open(const char *path, int flags[, mode_t mode]);
@@ -750,13 +797,7 @@ int mount(const char *src, const char *tgt,
static __attribute__((unused))
int sys_open(const char *path, int flags, mode_t mode)
{
-#ifdef __NR_openat
return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
-#elif defined(__NR_open)
- return my_syscall3(__NR_open, path, flags, mode);
-#else
- return __nolibc_enosys(__func__, path, flags, mode);
-#endif
}
static __attribute__((unused))
@@ -768,7 +809,7 @@ int open(const char *path, int flags, ...)
va_list args;
va_start(args, flags);
- mode = va_arg(args, int);
+ mode = va_arg(args, mode_t);
va_end(args);
}
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index ffff554a5230..aa5016ff3d91 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -119,14 +119,31 @@
#define SO_DETACH_REUSEPORT_BPF 68
+#define SO_PREFER_BUSY_POLL 69
+#define SO_BUSY_POLL_BUDGET 70
+
+#define SO_NETNS_COOKIE 71
+
+#define SO_BUF_LOCK 72
+
+#define SO_RESERVE_MEM 73
+
+#define SO_TXREHASH 74
+
#define SO_RCVMARK 75
#define SO_PASSPIDFD 76
#define SO_PEERPIDFD 77
-#define SCM_TS_OPT_ID 78
+#define SO_DEVMEM_LINEAR 78
+#define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR
+#define SO_DEVMEM_DMABUF 79
+#define SCM_DEVMEM_DMABUF SO_DEVMEM_DMABUF
+#define SO_DEVMEM_DONTNEED 80
+
+#define SCM_TS_OPT_ID 81
-#define SO_RCVPRIORITY 79
+#define SO_RCVPRIORITY 82
#if !defined(__KERNEL__)
diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h
index e16be0d37746..b8f629ef135f 100644
--- a/tools/include/uapi/linux/const.h
+++ b/tools/include/uapi/linux/const.h
@@ -33,7 +33,7 @@
* Missing asm support
*
* __BIT128() would not work in the asm code, as it shifts an
- * 'unsigned __init128' data type as direct representation of
+ * 'unsigned __int128' data type as direct representation of
* 128 bit constants is not supported in the gcc compiler, as
* they get silently truncated.
*
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 857a5f7b413d..168140f8e646 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -53,13 +53,6 @@ include $(srctree)/tools/scripts/Makefile.include
# copy a bit from Linux kbuild
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
INCLUDES = -I$(or $(OUTPUT),.) \
-I$(srctree)/tools/include -I$(srctree)/tools/include/uapi \
-I$(srctree)/tools/arch/$(SRCARCH)/include
@@ -96,12 +89,6 @@ override CFLAGS += $(CLANG_CROSS_FLAGS)
# flags specific for shared library
SHLIB_FLAGS := -DSHARED -fPIC
-ifeq ($(VERBOSE),1)
- Q =
-else
- Q = @
-endif
-
# Disable command line variables (CFLAGS) override from top
# level Makefile (perf), otherwise build Makefile will get
# the same command line setup.
diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index 3a9b2140aa04..e9a7ac2c062e 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative)
libdir_SQ = $(subst ','\'',$(libdir))
libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
-ifeq ($(VERBOSE),1)
- Q =
-else
- Q = @
-endif
-
TEST_ARGS := $(if $(V),-v)
# Set compile option CFLAGS
diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile
index 8890fd57b110..a1f5e388644d 100644
--- a/tools/lib/thermal/Makefile
+++ b/tools/lib/thermal/Makefile
@@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative)
libdir_SQ = $(subst ','\'',$(libdir))
libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
-ifeq ($(VERBOSE),1)
- Q =
-else
- Q = @
-endif
-
# Set compile option CFLAGS
ifdef EXTRA_CFLAGS
CFLAGS := $(EXTRA_CFLAGS)
diff --git a/tools/memory-model/Documentation/glossary.txt b/tools/memory-model/Documentation/glossary.txt
index 6f3d16dbf467..7ead94bffa4e 100644
--- a/tools/memory-model/Documentation/glossary.txt
+++ b/tools/memory-model/Documentation/glossary.txt
@@ -15,14 +15,14 @@ Address Dependency: When the address of a later memory access is computed
3 do_something(p->a);
4 rcu_read_unlock();
- In this case, because the address of "p->a" on line 3 is computed
- from the value returned by the rcu_dereference() on line 2, the
- address dependency extends from that rcu_dereference() to that
- "p->a". In rare cases, optimizing compilers can destroy address
- dependencies. Please see Documentation/RCU/rcu_dereference.rst
- for more information.
+ In this case, because the address of "p->a" on line 3 is computed
+ from the value returned by the rcu_dereference() on line 2, the
+ address dependency extends from that rcu_dereference() to that
+ "p->a". In rare cases, optimizing compilers can destroy address
+ dependencies. Please see Documentation/RCU/rcu_dereference.rst
+ for more information.
- See also "Control Dependency" and "Data Dependency".
+ See also "Control Dependency" and "Data Dependency".
Acquire: With respect to a lock, acquiring that lock, for example,
using spin_lock(). With respect to a non-lock shared variable,
@@ -59,12 +59,12 @@ Control Dependency: When a later store's execution depends on a test
1 if (READ_ONCE(x))
2 WRITE_ONCE(y, 1);
- Here, the control dependency extends from the READ_ONCE() on
- line 1 to the WRITE_ONCE() on line 2. Control dependencies are
- fragile, and can be easily destroyed by optimizing compilers.
- Please see control-dependencies.txt for more information.
+ Here, the control dependency extends from the READ_ONCE() on
+ line 1 to the WRITE_ONCE() on line 2. Control dependencies are
+ fragile, and can be easily destroyed by optimizing compilers.
+ Please see control-dependencies.txt for more information.
- See also "Address Dependency" and "Data Dependency".
+ See also "Address Dependency" and "Data Dependency".
Cycle: Memory-barrier pairing is restricted to a pair of CPUs, as the
name suggests. And in a great many cases, a pair of CPUs is all
@@ -72,10 +72,10 @@ Cycle: Memory-barrier pairing is restricted to a pair of CPUs, as the
extended to additional CPUs, and the result is called a "cycle".
In a cycle, each CPU's ordering interacts with that of the next:
- CPU 0 CPU 1 CPU 2
- WRITE_ONCE(x, 1); WRITE_ONCE(y, 1); WRITE_ONCE(z, 1);
- smp_mb(); smp_mb(); smp_mb();
- r0 = READ_ONCE(y); r1 = READ_ONCE(z); r2 = READ_ONCE(x);
+ CPU 0 CPU 1 CPU 2
+ WRITE_ONCE(x, 1); WRITE_ONCE(y, 1); WRITE_ONCE(z, 1);
+ smp_mb(); smp_mb(); smp_mb();
+ r0 = READ_ONCE(y); r1 = READ_ONCE(z); r2 = READ_ONCE(x);
CPU 0's smp_mb() interacts with that of CPU 1, which interacts
with that of CPU 2, which in turn interacts with that of CPU 0
diff --git a/tools/memory-model/Documentation/herd-representation.txt b/tools/memory-model/Documentation/herd-representation.txt
index ed988906f2b7..4e19b4f2a476 100644
--- a/tools/memory-model/Documentation/herd-representation.txt
+++ b/tools/memory-model/Documentation/herd-representation.txt
@@ -18,6 +18,11 @@
#
# By convention, a blank line in a cell means "same as the preceding line".
#
+# Note that the syntactic representation does not always match the sets and
+# relations in linux-kernel.cat, due to redefinitions in linux-kernel.bell and
+# lock.cat. For example, the po link between LKR and LKW is upgraded to an rmw
+# link, and W[ACQUIRE] are not included in the Acquire set.
+#
# Disclaimer. The table includes representations of "add" and "and" operations;
# corresponding/identical representations of "sub", "inc", "dec" and "or", "xor",
# "andnot" operations are omitted.
@@ -27,16 +32,16 @@
------------------------------------------------------------------------------
| Non-RMW ops | |
------------------------------------------------------------------------------
- | READ_ONCE | R[once] |
+ | READ_ONCE | R[ONCE] |
| atomic_read | |
- | WRITE_ONCE | W[once] |
+ | WRITE_ONCE | W[ONCE] |
| atomic_set | |
- | smp_load_acquire | R[acquire] |
+ | smp_load_acquire | R[ACQUIRE] |
| atomic_read_acquire | |
- | smp_store_release | W[release] |
+ | smp_store_release | W[RELEASE] |
| atomic_set_release | |
- | smp_store_mb | W[once] ->po F[mb] |
- | smp_mb | F[mb] |
+ | smp_store_mb | W[ONCE] ->po F[MB] |
+ | smp_mb | F[MB] |
| smp_rmb | F[rmb] |
| smp_wmb | F[wmb] |
| smp_mb__before_atomic | F[before-atomic] |
@@ -49,8 +54,8 @@
| rcu_read_lock | F[rcu-lock] |
| rcu_read_unlock | F[rcu-unlock] |
| synchronize_rcu | F[sync-rcu] |
- | rcu_dereference | R[once] |
- | rcu_assign_pointer | W[release] |
+ | rcu_dereference | R[ONCE] |
+ | rcu_assign_pointer | W[RELEASE] |
| srcu_read_lock | R[srcu-lock] |
| srcu_down_read | |
| srcu_read_unlock | W[srcu-unlock] |
@@ -60,32 +65,31 @@
------------------------------------------------------------------------------
| RMW ops w/o return value | |
------------------------------------------------------------------------------
- | atomic_add | R*[noreturn] ->rmw W*[once] |
+ | atomic_add | R*[NORETURN] ->rmw W*[NORETURN] |
| atomic_and | |
| spin_lock | LKR ->po LKW |
------------------------------------------------------------------------------
| RMW ops w/ return value | |
------------------------------------------------------------------------------
- | atomic_add_return | F[mb] ->po R*[once] |
- | | ->rmw W*[once] ->po F[mb] |
+ | atomic_add_return | R*[MB] ->rmw W*[MB] |
| atomic_fetch_add | |
| atomic_fetch_and | |
| atomic_xchg | |
| xchg | |
| atomic_add_negative | |
- | atomic_add_return_relaxed | R*[once] ->rmw W*[once] |
+ | atomic_add_return_relaxed | R*[ONCE] ->rmw W*[ONCE] |
| atomic_fetch_add_relaxed | |
| atomic_fetch_and_relaxed | |
| atomic_xchg_relaxed | |
| xchg_relaxed | |
| atomic_add_negative_relaxed | |
- | atomic_add_return_acquire | R*[acquire] ->rmw W*[once] |
+ | atomic_add_return_acquire | R*[ACQUIRE] ->rmw W*[ACQUIRE] |
| atomic_fetch_add_acquire | |
| atomic_fetch_and_acquire | |
| atomic_xchg_acquire | |
| xchg_acquire | |
| atomic_add_negative_acquire | |
- | atomic_add_return_release | R*[once] ->rmw W*[release] |
+ | atomic_add_return_release | R*[RELEASE] ->rmw W*[RELEASE] |
| atomic_fetch_add_release | |
| atomic_fetch_and_release | |
| atomic_xchg_release | |
@@ -94,17 +98,16 @@
------------------------------------------------------------------------------
| Conditional RMW ops | |
------------------------------------------------------------------------------
- | atomic_cmpxchg | On success: F[mb] ->po R*[once] |
- | | ->rmw W*[once] ->po F[mb] |
- | | On failure: R*[once] |
+ | atomic_cmpxchg | On success: R*[MB] ->rmw W*[MB] |
+ | | On failure: R*[MB] |
| cmpxchg | |
| atomic_add_unless | |
- | atomic_cmpxchg_relaxed | On success: R*[once] ->rmw W*[once] |
- | | On failure: R*[once] |
- | atomic_cmpxchg_acquire | On success: R*[acquire] ->rmw W*[once] |
- | | On failure: R*[once] |
- | atomic_cmpxchg_release | On success: R*[once] ->rmw W*[release] |
- | | On failure: R*[once] |
+ | atomic_cmpxchg_relaxed | On success: R*[ONCE] ->rmw W*[ONCE] |
+ | | On failure: R*[ONCE] |
+ | atomic_cmpxchg_acquire | On success: R*[ACQUIRE] ->rmw W*[ACQUIRE] |
+ | | On failure: R*[ACQUIRE] |
+ | atomic_cmpxchg_release | On success: R*[RELEASE] ->rmw W*[RELEASE] |
+ | | On failure: R*[RELEASE] |
| spin_trylock | On success: LKR ->po LKW |
| | On failure: LF |
------------------------------------------------------------------------------
diff --git a/tools/memory-model/README b/tools/memory-model/README
index dab38904206a..64c860863aa9 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -20,7 +20,7 @@ that litmus test to be exercised within the Linux kernel.
REQUIREMENTS
============
-Version 7.52 or higher of the "herd7" and "klitmus7" tools must be
+Version 7.58 or higher of the "herd7" and "klitmus7" tools must be
downloaded separately:
https://github.com/herd/herdtools7
@@ -79,7 +79,7 @@ Several thousand more example litmus tests are available here:
https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
-Documentation describing litmus tests and now to use them may be found
+Documentation describing litmus tests and how to use them may be found
here:
tools/memory-model/Documentation/litmus-tests.txt
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
index ce068700939c..fe65998002b9 100644
--- a/tools/memory-model/linux-kernel.bell
+++ b/tools/memory-model/linux-kernel.bell
@@ -13,17 +13,18 @@
"Linux-kernel memory consistency model"
-enum Accesses = 'once (*READ_ONCE,WRITE_ONCE*) ||
- 'release (*smp_store_release*) ||
- 'acquire (*smp_load_acquire*) ||
- 'noreturn (* R of non-return RMW *)
-instructions R[{'once,'acquire,'noreturn}]
-instructions W[{'once,'release}]
-instructions RMW[{'once,'acquire,'release}]
+enum Accesses = 'ONCE (*READ_ONCE,WRITE_ONCE*) ||
+ 'RELEASE (*smp_store_release*) ||
+ 'ACQUIRE (*smp_load_acquire*) ||
+ 'NORETURN (* R of non-return RMW *) ||
+ 'MB (*xchg(),cmpxchg(),...*)
+instructions R[Accesses]
+instructions W[Accesses]
+instructions RMW[Accesses]
enum Barriers = 'wmb (*smp_wmb*) ||
'rmb (*smp_rmb*) ||
- 'mb (*smp_mb*) ||
+ 'MB (*smp_mb*) ||
'barrier (*barrier*) ||
'rcu-lock (*rcu_read_lock*) ||
'rcu-unlock (*rcu_read_unlock*) ||
@@ -35,6 +36,17 @@ enum Barriers = 'wmb (*smp_wmb*) ||
'after-srcu-read-unlock (*smp_mb__after_srcu_read_unlock*)
instructions F[Barriers]
+
+(*
+ * Filter out syntactic annotations that do not provide the corresponding
+ * semantic ordering, such as Acquire on a store or Mb on a failed RMW.
+ *)
+let FailedRMW = RMW \ (domain(rmw) | range(rmw))
+let Acquire = ACQUIRE \ W \ FailedRMW
+let Release = RELEASE \ R \ FailedRMW
+let Mb = MB \ FailedRMW
+let Noreturn = NORETURN \ W
+
(* SRCU *)
enum SRCU = 'srcu-lock || 'srcu-unlock || 'sync-srcu
instructions SRCU[SRCU]
@@ -73,7 +85,7 @@ flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep
flag ~empty different-values(srcu-rscs) as srcu-bad-value-match
(* Compute marked and plain memory accesses *)
-let Marked = (~M) | IW | Once | Release | Acquire | domain(rmw) | range(rmw) |
+let Marked = (~M) | IW | ONCE | RELEASE | ACQUIRE | MB | RMW |
LKR | LKW | UL | LF | RL | RU | Srcu-lock | Srcu-unlock
let Plain = M \ Marked
@@ -82,3 +94,6 @@ let carry-dep = (data ; [~ Srcu-unlock] ; rfi)*
let addr = carry-dep ; addr
let ctrl = carry-dep ; ctrl
let data = carry-dep ; data
+
+flag ~empty (if "lkmmv2" then 0 else _)
+ as this-model-requires-variant-higher-than-lkmmv1
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index adf3c4f41229..d7e7bf13c831 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -34,6 +34,16 @@ let R4rmb = R \ Noreturn (* Reads for which rmb works *)
let rmb = [R4rmb] ; fencerel(Rmb) ; [R4rmb]
let wmb = [W] ; fencerel(Wmb) ; [W]
let mb = ([M] ; fencerel(Mb) ; [M]) |
+ (*
+ * full-barrier RMWs (successful cmpxchg(), xchg(), etc.) act as
+ * though there were enclosed by smp_mb().
+ * The effect of these virtual smp_mb() is formalized by adding
+ * Mb tags to the read and write of the operation, and providing
+ * the same ordering as though there were additional po edges
+ * between the Mb tag and the read resp. write.
+ *)
+ ([M] ; po ; [Mb & R]) |
+ ([Mb & W] ; po ; [M]) |
([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) |
([M] ; po? ; [RMW] ; fencerel(After-atomic) ; [M]) |
([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) |
diff --git a/tools/memory-model/linux-kernel.cfg b/tools/memory-model/linux-kernel.cfg
index 3c8098e99f41..69b04f3aad73 100644
--- a/tools/memory-model/linux-kernel.cfg
+++ b/tools/memory-model/linux-kernel.cfg
@@ -1,6 +1,7 @@
macros linux-kernel.def
bell linux-kernel.bell
model linux-kernel.cat
+variant lkmmv2
graph columns
squished true
showevents noregs
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
index 88a39601f525..49e402782e49 100644
--- a/tools/memory-model/linux-kernel.def
+++ b/tools/memory-model/linux-kernel.def
@@ -6,18 +6,18 @@
// which appeared in ASPLOS 2018.
// ONCE
-READ_ONCE(X) __load{once}(X)
-WRITE_ONCE(X,V) { __store{once}(X,V); }
+READ_ONCE(X) __load{ONCE}(X)
+WRITE_ONCE(X,V) { __store{ONCE}(X,V); }
// Release Acquire and friends
-smp_store_release(X,V) { __store{release}(*X,V); }
-smp_load_acquire(X) __load{acquire}(*X)
-rcu_assign_pointer(X,V) { __store{release}(X,V); }
-rcu_dereference(X) __load{once}(X)
-smp_store_mb(X,V) { __store{once}(X,V); __fence{mb}; }
+smp_store_release(X,V) { __store{RELEASE}(*X,V); }
+smp_load_acquire(X) __load{ACQUIRE}(*X)
+rcu_assign_pointer(X,V) { __store{RELEASE}(X,V); }
+rcu_dereference(X) __load{ONCE}(X)
+smp_store_mb(X,V) { __store{ONCE}(X,V); __fence{MB}; }
// Fences
-smp_mb() { __fence{mb}; }
+smp_mb() { __fence{MB}; }
smp_rmb() { __fence{rmb}; }
smp_wmb() { __fence{wmb}; }
smp_mb__before_atomic() { __fence{before-atomic}; }
@@ -28,14 +28,14 @@ smp_mb__after_srcu_read_unlock() { __fence{after-srcu-read-unlock}; }
barrier() { __fence{barrier}; }
// Exchange
-xchg(X,V) __xchg{mb}(X,V)
-xchg_relaxed(X,V) __xchg{once}(X,V)
-xchg_release(X,V) __xchg{release}(X,V)
-xchg_acquire(X,V) __xchg{acquire}(X,V)
-cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W)
-cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W)
-cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
-cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
+xchg(X,V) __xchg{MB}(X,V)
+xchg_relaxed(X,V) __xchg{ONCE}(X,V)
+xchg_release(X,V) __xchg{RELEASE}(X,V)
+xchg_acquire(X,V) __xchg{ACQUIRE}(X,V)
+cmpxchg(X,V,W) __cmpxchg{MB}(X,V,W)
+cmpxchg_relaxed(X,V,W) __cmpxchg{ONCE}(X,V,W)
+cmpxchg_acquire(X,V,W) __cmpxchg{ACQUIRE}(X,V,W)
+cmpxchg_release(X,V,W) __cmpxchg{RELEASE}(X,V,W)
// Spinlocks
spin_lock(X) { __lock(X); }
@@ -63,57 +63,86 @@ atomic_set(X,V) { WRITE_ONCE(*X,V); }
atomic_read_acquire(X) smp_load_acquire(X)
atomic_set_release(X,V) { smp_store_release(X,V); }
-atomic_add(V,X) { __atomic_op(X,+,V); }
-atomic_sub(V,X) { __atomic_op(X,-,V); }
-atomic_inc(X) { __atomic_op(X,+,1); }
-atomic_dec(X) { __atomic_op(X,-,1); }
-
-atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V)
-atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V)
-atomic_add_return_acquire(V,X) __atomic_op_return{acquire}(X,+,V)
-atomic_add_return_release(V,X) __atomic_op_return{release}(X,+,V)
-atomic_fetch_add(V,X) __atomic_fetch_op{mb}(X,+,V)
-atomic_fetch_add_relaxed(V,X) __atomic_fetch_op{once}(X,+,V)
-atomic_fetch_add_acquire(V,X) __atomic_fetch_op{acquire}(X,+,V)
-atomic_fetch_add_release(V,X) __atomic_fetch_op{release}(X,+,V)
-
-atomic_inc_return(X) __atomic_op_return{mb}(X,+,1)
-atomic_inc_return_relaxed(X) __atomic_op_return{once}(X,+,1)
-atomic_inc_return_acquire(X) __atomic_op_return{acquire}(X,+,1)
-atomic_inc_return_release(X) __atomic_op_return{release}(X,+,1)
-atomic_fetch_inc(X) __atomic_fetch_op{mb}(X,+,1)
-atomic_fetch_inc_relaxed(X) __atomic_fetch_op{once}(X,+,1)
-atomic_fetch_inc_acquire(X) __atomic_fetch_op{acquire}(X,+,1)
-atomic_fetch_inc_release(X) __atomic_fetch_op{release}(X,+,1)
-
-atomic_sub_return(V,X) __atomic_op_return{mb}(X,-,V)
-atomic_sub_return_relaxed(V,X) __atomic_op_return{once}(X,-,V)
-atomic_sub_return_acquire(V,X) __atomic_op_return{acquire}(X,-,V)
-atomic_sub_return_release(V,X) __atomic_op_return{release}(X,-,V)
-atomic_fetch_sub(V,X) __atomic_fetch_op{mb}(X,-,V)
-atomic_fetch_sub_relaxed(V,X) __atomic_fetch_op{once}(X,-,V)
-atomic_fetch_sub_acquire(V,X) __atomic_fetch_op{acquire}(X,-,V)
-atomic_fetch_sub_release(V,X) __atomic_fetch_op{release}(X,-,V)
-
-atomic_dec_return(X) __atomic_op_return{mb}(X,-,1)
-atomic_dec_return_relaxed(X) __atomic_op_return{once}(X,-,1)
-atomic_dec_return_acquire(X) __atomic_op_return{acquire}(X,-,1)
-atomic_dec_return_release(X) __atomic_op_return{release}(X,-,1)
-atomic_fetch_dec(X) __atomic_fetch_op{mb}(X,-,1)
-atomic_fetch_dec_relaxed(X) __atomic_fetch_op{once}(X,-,1)
-atomic_fetch_dec_acquire(X) __atomic_fetch_op{acquire}(X,-,1)
-atomic_fetch_dec_release(X) __atomic_fetch_op{release}(X,-,1)
-
-atomic_xchg(X,V) __xchg{mb}(X,V)
-atomic_xchg_relaxed(X,V) __xchg{once}(X,V)
-atomic_xchg_release(X,V) __xchg{release}(X,V)
-atomic_xchg_acquire(X,V) __xchg{acquire}(X,V)
-atomic_cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W)
-atomic_cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W)
-atomic_cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
-atomic_cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
-
-atomic_sub_and_test(V,X) __atomic_op_return{mb}(X,-,V) == 0
-atomic_dec_and_test(X) __atomic_op_return{mb}(X,-,1) == 0
-atomic_inc_and_test(X) __atomic_op_return{mb}(X,+,1) == 0
-atomic_add_negative(V,X) __atomic_op_return{mb}(X,+,V) < 0
+atomic_add(V,X) { __atomic_op{NORETURN}(X,+,V); }
+atomic_sub(V,X) { __atomic_op{NORETURN}(X,-,V); }
+atomic_and(V,X) { __atomic_op{NORETURN}(X,&,V); }
+atomic_or(V,X) { __atomic_op{NORETURN}(X,|,V); }
+atomic_xor(V,X) { __atomic_op{NORETURN}(X,^,V); }
+atomic_inc(X) { __atomic_op{NORETURN}(X,+,1); }
+atomic_dec(X) { __atomic_op{NORETURN}(X,-,1); }
+atomic_andnot(V,X) { __atomic_op{NORETURN}(X,&~,V); }
+
+atomic_add_return(V,X) __atomic_op_return{MB}(X,+,V)
+atomic_add_return_relaxed(V,X) __atomic_op_return{ONCE}(X,+,V)
+atomic_add_return_acquire(V,X) __atomic_op_return{ACQUIRE}(X,+,V)
+atomic_add_return_release(V,X) __atomic_op_return{RELEASE}(X,+,V)
+atomic_fetch_add(V,X) __atomic_fetch_op{MB}(X,+,V)
+atomic_fetch_add_relaxed(V,X) __atomic_fetch_op{ONCE}(X,+,V)
+atomic_fetch_add_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,+,V)
+atomic_fetch_add_release(V,X) __atomic_fetch_op{RELEASE}(X,+,V)
+
+atomic_fetch_and(V,X) __atomic_fetch_op{MB}(X,&,V)
+atomic_fetch_and_relaxed(V,X) __atomic_fetch_op{ONCE}(X,&,V)
+atomic_fetch_and_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,&,V)
+atomic_fetch_and_release(V,X) __atomic_fetch_op{RELEASE}(X,&,V)
+
+atomic_fetch_or(V,X) __atomic_fetch_op{MB}(X,|,V)
+atomic_fetch_or_relaxed(V,X) __atomic_fetch_op{ONCE}(X,|,V)
+atomic_fetch_or_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,|,V)
+atomic_fetch_or_release(V,X) __atomic_fetch_op{RELEASE}(X,|,V)
+
+atomic_fetch_xor(V,X) __atomic_fetch_op{MB}(X,^,V)
+atomic_fetch_xor_relaxed(V,X) __atomic_fetch_op{ONCE}(X,^,V)
+atomic_fetch_xor_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,^,V)
+atomic_fetch_xor_release(V,X) __atomic_fetch_op{RELEASE}(X,^,V)
+
+atomic_inc_return(X) __atomic_op_return{MB}(X,+,1)
+atomic_inc_return_relaxed(X) __atomic_op_return{ONCE}(X,+,1)
+atomic_inc_return_acquire(X) __atomic_op_return{ACQUIRE}(X,+,1)
+atomic_inc_return_release(X) __atomic_op_return{RELEASE}(X,+,1)
+atomic_fetch_inc(X) __atomic_fetch_op{MB}(X,+,1)
+atomic_fetch_inc_relaxed(X) __atomic_fetch_op{ONCE}(X,+,1)
+atomic_fetch_inc_acquire(X) __atomic_fetch_op{ACQUIRE}(X,+,1)
+atomic_fetch_inc_release(X) __atomic_fetch_op{RELEASE}(X,+,1)
+
+atomic_sub_return(V,X) __atomic_op_return{MB}(X,-,V)
+atomic_sub_return_relaxed(V,X) __atomic_op_return{ONCE}(X,-,V)
+atomic_sub_return_acquire(V,X) __atomic_op_return{ACQUIRE}(X,-,V)
+atomic_sub_return_release(V,X) __atomic_op_return{RELEASE}(X,-,V)
+atomic_fetch_sub(V,X) __atomic_fetch_op{MB}(X,-,V)
+atomic_fetch_sub_relaxed(V,X) __atomic_fetch_op{ONCE}(X,-,V)
+atomic_fetch_sub_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,-,V)
+atomic_fetch_sub_release(V,X) __atomic_fetch_op{RELEASE}(X,-,V)
+
+atomic_dec_return(X) __atomic_op_return{MB}(X,-,1)
+atomic_dec_return_relaxed(X) __atomic_op_return{ONCE}(X,-,1)
+atomic_dec_return_acquire(X) __atomic_op_return{ACQUIRE}(X,-,1)
+atomic_dec_return_release(X) __atomic_op_return{RELEASE}(X,-,1)
+atomic_fetch_dec(X) __atomic_fetch_op{MB}(X,-,1)
+atomic_fetch_dec_relaxed(X) __atomic_fetch_op{ONCE}(X,-,1)
+atomic_fetch_dec_acquire(X) __atomic_fetch_op{ACQUIRE}(X,-,1)
+atomic_fetch_dec_release(X) __atomic_fetch_op{RELEASE}(X,-,1)
+
+atomic_xchg(X,V) __xchg{MB}(X,V)
+atomic_xchg_relaxed(X,V) __xchg{ONCE}(X,V)
+atomic_xchg_release(X,V) __xchg{RELEASE}(X,V)
+atomic_xchg_acquire(X,V) __xchg{ACQUIRE}(X,V)
+atomic_cmpxchg(X,V,W) __cmpxchg{MB}(X,V,W)
+atomic_cmpxchg_relaxed(X,V,W) __cmpxchg{ONCE}(X,V,W)
+atomic_cmpxchg_acquire(X,V,W) __cmpxchg{ACQUIRE}(X,V,W)
+atomic_cmpxchg_release(X,V,W) __cmpxchg{RELEASE}(X,V,W)
+
+atomic_sub_and_test(V,X) __atomic_op_return{MB}(X,-,V) == 0
+atomic_dec_and_test(X) __atomic_op_return{MB}(X,-,1) == 0
+atomic_inc_and_test(X) __atomic_op_return{MB}(X,+,1) == 0
+atomic_add_negative(V,X) __atomic_op_return{MB}(X,+,V) < 0
+atomic_add_negative_relaxed(V,X) __atomic_op_return{ONCE}(X,+,V) < 0
+atomic_add_negative_acquire(V,X) __atomic_op_return{ACQUIRE}(X,+,V) < 0
+atomic_add_negative_release(V,X) __atomic_op_return{RELEASE}(X,+,V) < 0
+
+atomic_fetch_andnot(V,X) __atomic_fetch_op{MB}(X,&~,V)
+atomic_fetch_andnot_acquire(V,X) __atomic_fetch_op{ACQUIRE}(X,&~,V)
+atomic_fetch_andnot_release(V,X) __atomic_fetch_op{RELEASE}(X,&~,V)
+atomic_fetch_andnot_relaxed(V,X) __atomic_fetch_op{ONCE}(X,&~,V)
+
+atomic_add_unless(X,V,W) __atomic_add_unless{MB}(X,V,W)
diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c
index bcac7ebfb51f..d7e5e8902af8 100644
--- a/tools/mm/page-types.c
+++ b/tools/mm/page-types.c
@@ -24,8 +24,8 @@
#include <signal.h>
#include <inttypes.h>
#include <sys/types.h>
-#include <sys/errno.h>
-#include <sys/fcntl.h>
+#include <errno.h>
+#include <fcntl.h>
#include <sys/mount.h>
#include <sys/statfs.h>
#include <sys/mman.h>
diff --git a/tools/objtool/Documentation/objtool.txt b/tools/objtool/Documentation/objtool.txt
index 7c3ee959b63c..28ac57b9e102 100644
--- a/tools/objtool/Documentation/objtool.txt
+++ b/tools/objtool/Documentation/objtool.txt
@@ -28,6 +28,15 @@ Objtool has the following features:
sites, enabling the kernel to patch them inline, to prevent "thunk
funneling" for both security and performance reasons
+- Return thunk validation -- validates return thunks are used for
+ certain CPU mitigations including Retbleed and SRSO
+
+- Return thunk annotation -- annotates all return thunk sites so kernel
+ can patch them inline, depending on enabled mitigations
+
+- Return thunk training valiation -- validate that all entry paths
+ untrain a "safe return" before the first return (or call)
+
- Non-instrumentation validation -- validates non-instrumentable
("noinstr") code rules, preventing instrumentation in low-level C
entry code
@@ -53,6 +62,9 @@ Objtool has the following features:
- Function entry annotation -- annotates function entries, enabling
kernel function tracing
+- Function preamble (prefix) annotation and/or symbol generation -- used
+ for FineIBT and call depth tracking
+
- Other toolchain hacks which will go unmentioned at this time...
Each feature can be enabled individually or in combination using the
@@ -197,19 +209,17 @@ To achieve the validation, objtool enforces the following rules:
1. Each callable function must be annotated as such with the ELF
function type. In asm code, this is typically done using the
- ENTRY/ENDPROC macros. If objtool finds a return instruction
+ SYM_FUNC_{START,END} macros. If objtool finds a return instruction
outside of a function, it flags an error since that usually indicates
callable code which should be annotated accordingly.
This rule is needed so that objtool can properly identify each
callable function in order to analyze its stack metadata.
-2. Conversely, each section of code which is *not* callable should *not*
- be annotated as an ELF function. The ENDPROC macro shouldn't be used
- in this case.
-
- This rule is needed so that objtool can ignore non-callable code.
- Such code doesn't have to follow any of the other rules.
+2. Conversely, each section of code which is *not* callable, or is
+ otherwise doing funny things with the stack or registers, should
+ *not* be annotated as an ELF function. Rather, SYM_CODE_{START,END}
+ should be used along with unwind hints.
3. Each callable function which calls another function must have the
correct frame pointer logic, if required by CONFIG_FRAME_POINTER or
@@ -221,7 +231,7 @@ To achieve the validation, objtool enforces the following rules:
function B, the _caller_ of function A will be skipped on the stack
trace.
-4. Dynamic jumps and jumps to undefined symbols are only allowed if:
+4. Indirect jumps and jumps to undefined symbols are only allowed if:
a) the jump is part of a switch statement; or
@@ -304,29 +314,29 @@ the objtool maintainers.
001e 2823e: 80 ce 02 or $0x2,%dh
...
+
2. file.o: warning: objtool: .text+0x53: unreachable instruction
Objtool couldn't find a code path to reach the instruction.
If the error is for an asm file, and the instruction is inside (or
reachable from) a callable function, the function should be annotated
- with the ENTRY/ENDPROC macros (ENDPROC is the important one).
- Otherwise, the code should probably be annotated with the unwind hint
- macros in asm/unwind_hints.h so objtool and the unwinder can know the
- stack state associated with the code.
+ with the SYM_FUNC_START and SYM_FUNC_END macros.
- If you're 100% sure the code won't affect stack traces, or if you're
- a just a bad person, you can tell objtool to ignore it. See the
- "Adding exceptions" section below.
+ Otherwise, SYM_CODE_START can be used. In that case the code needs
+ to be annotated with unwind hint macros.
- If it's not actually in a callable function (e.g. kernel entry code),
- change ENDPROC to END.
+ If you're sure the code won't affect the reliability of runtime stack
+ traces and want objtool to ignore it, see "Adding exceptions" below.
-3. file.o: warning: objtool: foo+0x48c: bar() is missing a __noreturn annotation
- The call from foo() to bar() doesn't return, but bar() is missing the
- __noreturn annotation. NOTE: In addition to annotating the function
- with __noreturn, please also add it to tools/objtool/noreturns.h.
+3. file.o: warning: objtool: foo+0x48c: bar() missing __noreturn in .c/.h or NORETURN() in noreturns.h
+
+ The call from foo() to bar() doesn't return, but bar() is incorrectly
+ annotated. A noreturn function must be marked __noreturn in both its
+ declaration and its definition, and must have a NORETURN() annotation
+ in tools/objtool/noreturns.h.
+
4. file.o: warning: objtool: func(): can't find starting instruction
or
@@ -341,23 +351,21 @@ the objtool maintainers.
This is a kernel entry/exit instruction like sysenter or iret. Such
instructions aren't allowed in a callable function, and are most
- likely part of the kernel entry code. They should usually not have
- the callable function annotation (ENDPROC) and should always be
- annotated with the unwind hint macros in asm/unwind_hints.h.
+ likely part of the kernel entry code. Such code should probably be
+ placed in a SYM_FUNC_CODE block with unwind hints.
6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
- This is a dynamic jump or a jump to an undefined symbol. Objtool
- assumed it's a sibling call and detected that the frame pointer
- wasn't first restored to its original state.
+ This is a branch to an UNDEF symbol. Objtool assumed it's a
+ sibling call and detected that the stack wasn't first restored to its
+ original state.
- If it's not really a sibling call, you may need to move the
- destination code to the local file.
+ If it's not really a sibling call, you may need to use unwind hints
+ and/or move the destination code to the local file.
If the instruction is not actually in a callable function (e.g.
- kernel entry code), change ENDPROC to END and annotate manually with
- the unwind hint macros in asm/unwind_hints.h.
+ kernel entry code), use SYM_CODE_{START,END} and unwind hints.
7. file: warning: objtool: func()+0x5c: stack state mismatch
@@ -373,8 +381,8 @@ the objtool maintainers.
Another possibility is that the code has some asm or inline asm which
does some unusual things to the stack or the frame pointer. In such
- cases it's probably appropriate to use the unwind hint macros in
- asm/unwind_hints.h.
+ cases it's probably appropriate to use SYM_FUNC_CODE with unwind
+ hints.
8. file.o: warning: objtool: funcA() falls through to next function funcB()
@@ -384,17 +392,16 @@ the objtool maintainers.
can fall through into the next function. There could be different
reasons for this:
- 1) funcA()'s last instruction is a call to a "noreturn" function like
+ a) funcA()'s last instruction is a call to a "noreturn" function like
panic(). In this case the noreturn function needs to be added to
objtool's hard-coded global_noreturns array. Feel free to bug the
objtool maintainer, or you can submit a patch.
- 2) funcA() uses the unreachable() annotation in a section of code
+ b) funcA() uses the unreachable() annotation in a section of code
that is actually reachable.
- 3) If funcA() calls an inline function, the object code for funcA()
- might be corrupt due to a gcc bug. For more details, see:
- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
+ c) Some undefined behavior like divide by zero.
+
9. file.o: warning: objtool: funcA() call to funcB() with UACCESS enabled
@@ -432,24 +439,26 @@ the objtool maintainers.
This limitation can be overcome by massaging the alternatives with
NOPs to shift the stack changes around so they no longer conflict.
+
11. file.o: warning: unannotated intra-function call
- This warning means that a direct call is done to a destination which
- is not at the beginning of a function. If this is a legit call, you
- can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL
- directive right before the call.
+ This warning means that a direct call is done to a destination which
+ is not at the beginning of a function. If this is a legit call, you
+ can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL
+ directive right before the call.
+
12. file.o: warning: func(): not an indirect call target
- This means that objtool is running with --ibt and a function expected
- to be an indirect call target is not. In particular, this happens for
- init_module() or cleanup_module() if a module relies on these special
- names and does not use module_init() / module_exit() macros to create
- them.
+ This means that objtool is running with --ibt and a function
+ expected to be an indirect call target is not. In particular, this
+ happens for init_module() or cleanup_module() if a module relies on
+ these special names and does not use module_init() / module_exit()
+ macros to create them.
If the error doesn't seem to make sense, it could be a bug in objtool.
-Feel free to ask the objtool maintainer for help.
+Feel free to ask objtool maintainers for help.
Adding exceptions
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index f56e27727534..8c20361dd100 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -37,7 +37,7 @@ OBJTOOL_CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBE
OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
# Allow old libelf to be used:
-elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(HOSTCC) $(OBJTOOL_CFLAGS) -x c -E - | grep elf_getshdr)
+elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(HOSTCC) $(OBJTOOL_CFLAGS) -x c -E - 2>/dev/null | grep elf_getshdr)
OBJTOOL_CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
# Always want host compilation.
@@ -46,12 +46,6 @@ HOST_OVERRIDES := CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)"
AWK = awk
MKDIR = mkdir
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
BUILD_ORC := n
ifeq ($(SRCARCH),x86)
diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c
index 69b66994f2a1..02e490555966 100644
--- a/tools/objtool/arch/loongarch/decode.c
+++ b/tools/objtool/arch/loongarch/decode.c
@@ -5,10 +5,7 @@
#include <asm/inst.h>
#include <asm/orc_types.h>
#include <linux/objtool_types.h>
-
-#ifndef EM_LOONGARCH
-#define EM_LOONGARCH 258
-#endif
+#include <arch/elf.h>
int arch_ftrace_match(char *name)
{
@@ -363,3 +360,26 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state)
state->cfa.base = CFI_SP;
state->cfa.offset = 0;
}
+
+unsigned int arch_reloc_size(struct reloc *reloc)
+{
+ switch (reloc_type(reloc)) {
+ case R_LARCH_32:
+ case R_LARCH_32_PCREL:
+ return 4;
+ default:
+ return 8;
+ }
+}
+
+unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table)
+{
+ switch (reloc_type(reloc)) {
+ case R_LARCH_32_PCREL:
+ case R_LARCH_64_PCREL:
+ return reloc->sym->offset + reloc_addend(reloc) -
+ (reloc_offset(reloc) - reloc_offset(table));
+ default:
+ return reloc->sym->offset + reloc_addend(reloc);
+ }
+}
diff --git a/tools/objtool/arch/loongarch/include/arch/elf.h b/tools/objtool/arch/loongarch/include/arch/elf.h
index 9623d663220e..ec79062c9554 100644
--- a/tools/objtool/arch/loongarch/include/arch/elf.h
+++ b/tools/objtool/arch/loongarch/include/arch/elf.h
@@ -18,6 +18,13 @@
#ifndef R_LARCH_32_PCREL
#define R_LARCH_32_PCREL 99
#endif
+#ifndef R_LARCH_64_PCREL
+#define R_LARCH_64_PCREL 109
+#endif
+
+#ifndef EM_LOONGARCH
+#define EM_LOONGARCH 258
+#endif
#define R_NONE R_LARCH_NONE
#define R_ABS32 R_LARCH_32
diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c
index 87230ed570fd..e39f86d97002 100644
--- a/tools/objtool/arch/loongarch/special.c
+++ b/tools/objtool/arch/loongarch/special.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
+#include <string.h>
#include <objtool/special.h>
+#include <objtool/warn.h>
bool arch_support_alt_relocation(struct special_alt *special_alt,
struct instruction *insn,
@@ -8,9 +10,164 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
return false;
}
+struct table_info {
+ struct list_head jump_info;
+ unsigned long insn_offset;
+ unsigned long rodata_offset;
+};
+
+static void get_rodata_table_size_by_table_annotate(struct objtool_file *file,
+ struct instruction *insn,
+ unsigned long *table_size)
+{
+ struct section *rsec;
+ struct reloc *reloc;
+ struct list_head table_list;
+ struct table_info *orig_table;
+ struct table_info *next_table;
+ unsigned long tmp_insn_offset;
+ unsigned long tmp_rodata_offset;
+
+ rsec = find_section_by_name(file->elf, ".rela.discard.tablejump_annotate");
+ if (!rsec)
+ return;
+
+ INIT_LIST_HEAD(&table_list);
+
+ for_each_reloc(rsec, reloc) {
+ orig_table = malloc(sizeof(struct table_info));
+ if (!orig_table) {
+ WARN("malloc failed");
+ return;
+ }
+
+ orig_table->insn_offset = reloc->sym->offset + reloc_addend(reloc);
+ reloc++;
+ orig_table->rodata_offset = reloc->sym->offset + reloc_addend(reloc);
+
+ list_add_tail(&orig_table->jump_info, &table_list);
+
+ if (reloc_idx(reloc) + 1 == sec_num_entries(rsec))
+ break;
+ }
+
+ list_for_each_entry(orig_table, &table_list, jump_info) {
+ next_table = list_next_entry(orig_table, jump_info);
+ list_for_each_entry_from(next_table, &table_list, jump_info) {
+ if (next_table->rodata_offset < orig_table->rodata_offset) {
+ tmp_insn_offset = next_table->insn_offset;
+ tmp_rodata_offset = next_table->rodata_offset;
+ next_table->insn_offset = orig_table->insn_offset;
+ next_table->rodata_offset = orig_table->rodata_offset;
+ orig_table->insn_offset = tmp_insn_offset;
+ orig_table->rodata_offset = tmp_rodata_offset;
+ }
+ }
+ }
+
+ list_for_each_entry(orig_table, &table_list, jump_info) {
+ if (insn->offset == orig_table->insn_offset) {
+ next_table = list_next_entry(orig_table, jump_info);
+ if (&next_table->jump_info == &table_list) {
+ *table_size = 0;
+ return;
+ }
+
+ while (next_table->rodata_offset == orig_table->rodata_offset) {
+ next_table = list_next_entry(next_table, jump_info);
+ if (&next_table->jump_info == &table_list) {
+ *table_size = 0;
+ return;
+ }
+ }
+
+ *table_size = next_table->rodata_offset - orig_table->rodata_offset;
+ }
+ }
+}
+
+static struct reloc *find_reloc_by_table_annotate(struct objtool_file *file,
+ struct instruction *insn,
+ unsigned long *table_size)
+{
+ struct section *rsec;
+ struct reloc *reloc;
+ unsigned long offset;
+
+ rsec = find_section_by_name(file->elf, ".rela.discard.tablejump_annotate");
+ if (!rsec)
+ return NULL;
+
+ for_each_reloc(rsec, reloc) {
+ if (reloc->sym->sec->rodata)
+ continue;
+
+ if (strcmp(insn->sec->name, reloc->sym->sec->name))
+ continue;
+
+ offset = reloc->sym->offset + reloc_addend(reloc);
+ if (insn->offset == offset) {
+ get_rodata_table_size_by_table_annotate(file, insn, table_size);
+ reloc++;
+ return reloc;
+ }
+ }
+
+ return NULL;
+}
+
+static struct reloc *find_reloc_of_rodata_c_jump_table(struct section *sec,
+ unsigned long offset,
+ unsigned long *table_size)
+{
+ struct section *rsec;
+ struct reloc *reloc;
+
+ rsec = sec->rsec;
+ if (!rsec)
+ return NULL;
+
+ for_each_reloc(rsec, reloc) {
+ if (reloc_offset(reloc) > offset)
+ break;
+
+ if (!strcmp(reloc->sym->sec->name, C_JUMP_TABLE_SECTION)) {
+ *table_size = 0;
+ return reloc;
+ }
+ }
+
+ return NULL;
+}
+
struct reloc *arch_find_switch_table(struct objtool_file *file,
struct instruction *insn,
unsigned long *table_size)
{
- return NULL;
+ struct reloc *annotate_reloc;
+ struct reloc *rodata_reloc;
+ struct section *table_sec;
+ unsigned long table_offset;
+
+ annotate_reloc = find_reloc_by_table_annotate(file, insn, table_size);
+ if (!annotate_reloc) {
+ annotate_reloc = find_reloc_of_rodata_c_jump_table(
+ insn->sec, insn->offset, table_size);
+ if (!annotate_reloc)
+ return NULL;
+ }
+
+ table_sec = annotate_reloc->sym->sec;
+ table_offset = annotate_reloc->sym->offset + reloc_addend(annotate_reloc);
+
+ /*
+ * Each table entry has a rela associated with it. The rela
+ * should reference text in the same function as the original
+ * instruction.
+ */
+ rodata_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset);
+ if (!rodata_reloc)
+ return NULL;
+
+ return rodata_reloc;
}
diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
index 53b55690f320..7c0bf2429067 100644
--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -106,3 +106,17 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state)
state->regs[CFI_RA].base = CFI_CFA;
state->regs[CFI_RA].offset = 0;
}
+
+unsigned int arch_reloc_size(struct reloc *reloc)
+{
+ switch (reloc_type(reloc)) {
+ case R_PPC_REL32:
+ case R_PPC_ADDR32:
+ case R_PPC_UADDR32:
+ case R_PPC_PLT32:
+ case R_PPC_PLTREL32:
+ return 4;
+ default:
+ return 8;
+ }
+}
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index fe1362c34564..7567c893f45e 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -850,5 +850,19 @@ bool arch_is_rethunk(struct symbol *sym)
bool arch_is_embedded_insn(struct symbol *sym)
{
return !strcmp(sym->name, "retbleed_return_thunk") ||
+ !strcmp(sym->name, "srso_alias_safe_ret") ||
!strcmp(sym->name, "srso_safe_ret");
}
+
+unsigned int arch_reloc_size(struct reloc *reloc)
+{
+ switch (reloc_type(reloc)) {
+ case R_X86_64_32:
+ case R_X86_64_32S:
+ case R_X86_64_PC32:
+ case R_X86_64_PLT32:
+ return 4;
+ default:
+ return 8;
+ }
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 387d56a7f5fb..5f761f420b8c 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -6,6 +6,10 @@
#include <subcmd/parse-options.h>
#include <string.h>
#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/sendfile.h>
#include <objtool/builtin.h>
#include <objtool/objtool.h>
@@ -14,6 +18,8 @@
"error: objtool: " format "\n", \
##__VA_ARGS__)
+const char *objname;
+
struct opts opts;
static const char * const check_usage[] = {
@@ -71,7 +77,7 @@ static const struct option check_options[] = {
OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
- OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"),
+ OPT_BOOLEAN(0, "orc", &opts.orc, "generate ORC metadata"),
OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"),
@@ -84,16 +90,17 @@ static const struct option check_options[] = {
OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
OPT_GROUP("Options:"),
- OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"),
- OPT_BOOLEAN(0, "backup", &opts.backup, "create .orig files before modification"),
- OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"),
- OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"),
- OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"),
- OPT_BOOLEAN(0, "mnop", &opts.mnop, "nop out mcount call sites"),
- OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
- OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
- OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
+ OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"),
+ OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"),
+ OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"),
+ OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"),
+ OPT_BOOLEAN(0, "mnop", &opts.mnop, "nop out mcount call sites"),
+ OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
+ OPT_STRING('o', "output", &opts.output, "file", "output file name"),
+ OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
+ OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
OPT_BOOLEAN('v', "verbose", &opts.verbose, "verbose warnings"),
+ OPT_BOOLEAN(0, "Werror", &opts.werror, "return error on warnings"),
OPT_END(),
};
@@ -131,6 +138,26 @@ int cmd_parse_options(int argc, const char **argv, const char * const usage[])
static bool opts_valid(void)
{
+ if (opts.mnop && !opts.mcount) {
+ ERROR("--mnop requires --mcount");
+ return false;
+ }
+
+ if (opts.noinstr && !opts.link) {
+ ERROR("--noinstr requires --link");
+ return false;
+ }
+
+ if (opts.ibt && !opts.link) {
+ ERROR("--ibt requires --link");
+ return false;
+ }
+
+ if (opts.unret && !opts.link) {
+ ERROR("--unret requires --link");
+ return false;
+ }
+
if (opts.hack_jump_label ||
opts.hack_noinstr ||
opts.ibt ||
@@ -151,11 +178,6 @@ static bool opts_valid(void)
return true;
}
- if (opts.unret && !opts.rethunk) {
- ERROR("--unret requires --rethunk");
- return false;
- }
-
if (opts.dump_orc)
return true;
@@ -163,76 +185,156 @@ static bool opts_valid(void)
return false;
}
-static bool mnop_opts_valid(void)
+static int copy_file(const char *src, const char *dst)
{
- if (opts.mnop && !opts.mcount) {
- ERROR("--mnop requires --mcount");
- return false;
+ size_t to_copy, copied;
+ int dst_fd, src_fd;
+ struct stat stat;
+ off_t offset = 0;
+
+ src_fd = open(src, O_RDONLY);
+ if (src_fd == -1) {
+ ERROR("can't open '%s' for reading", src);
+ return 1;
}
- return true;
-}
-
-static bool link_opts_valid(struct objtool_file *file)
-{
- if (opts.link)
- return true;
+ dst_fd = open(dst, O_WRONLY | O_CREAT | O_TRUNC, 0400);
+ if (dst_fd == -1) {
+ ERROR("can't open '%s' for writing", dst);
+ return 1;
+ }
- if (has_multiple_files(file->elf)) {
- ERROR("Linked object detected, forcing --link");
- opts.link = true;
- return true;
+ if (fstat(src_fd, &stat) == -1) {
+ perror("fstat");
+ return 1;
}
- if (opts.noinstr) {
- ERROR("--noinstr requires --link");
- return false;
+ if (fchmod(dst_fd, stat.st_mode) == -1) {
+ perror("fchmod");
+ return 1;
}
- if (opts.ibt) {
- ERROR("--ibt requires --link");
- return false;
+ for (to_copy = stat.st_size; to_copy > 0; to_copy -= copied) {
+ copied = sendfile(dst_fd, src_fd, &offset, to_copy);
+ if (copied == -1) {
+ perror("sendfile");
+ return 1;
+ }
}
- if (opts.unret) {
- ERROR("--unret requires --link");
- return false;
+ close(dst_fd);
+ close(src_fd);
+ return 0;
+}
+
+static char **save_argv(int argc, const char **argv)
+{
+ char **orig_argv;
+
+ orig_argv = calloc(argc, sizeof(char *));
+ if (!orig_argv) {
+ perror("calloc");
+ return NULL;
}
- return true;
+ for (int i = 0; i < argc; i++) {
+ orig_argv[i] = strdup(argv[i]);
+ if (!orig_argv[i]) {
+ perror("strdup");
+ return NULL;
+ }
+ };
+
+ return orig_argv;
}
+#define ORIG_SUFFIX ".orig"
+
int objtool_run(int argc, const char **argv)
{
- const char *objname;
struct objtool_file *file;
- int ret;
+ char *backup = NULL;
+ char **orig_argv;
+ int ret = 0;
- argc = cmd_parse_options(argc, argv, check_usage);
- objname = argv[0];
+ orig_argv = save_argv(argc, argv);
+ if (!orig_argv)
+ return 1;
+
+ cmd_parse_options(argc, argv, check_usage);
if (!opts_valid())
return 1;
+ objname = argv[0];
+
if (opts.dump_orc)
return orc_dump(objname);
+ if (!opts.dryrun && opts.output) {
+ /* copy original .o file to output file */
+ if (copy_file(objname, opts.output))
+ return 1;
+
+ /* from here on, work directly on the output file */
+ objname = opts.output;
+ }
+
file = objtool_open_read(objname);
if (!file)
- return 1;
+ goto err;
- if (!mnop_opts_valid())
- return 1;
-
- if (!link_opts_valid(file))
- return 1;
+ if (!opts.link && has_multiple_files(file->elf)) {
+ ERROR("Linked object requires --link");
+ goto err;
+ }
ret = check(file);
if (ret)
- return ret;
+ goto err;
- if (file->elf->changed)
- return elf_write(file->elf);
+ if (!opts.dryrun && file->elf->changed && elf_write(file->elf))
+ goto err;
return 0;
+
+err:
+ if (opts.dryrun)
+ goto err_msg;
+
+ if (opts.output) {
+ unlink(opts.output);
+ goto err_msg;
+ }
+
+ /*
+ * Make a backup before kbuild deletes the file so the error
+ * can be recreated without recompiling or relinking.
+ */
+ backup = malloc(strlen(objname) + strlen(ORIG_SUFFIX) + 1);
+ if (!backup) {
+ perror("malloc");
+ return 1;
+ }
+
+ strcpy(backup, objname);
+ strcat(backup, ORIG_SUFFIX);
+ if (copy_file(objname, backup))
+ return 1;
+
+err_msg:
+ fprintf(stderr, "%s", orig_argv[0]);
+
+ for (int i = 1; i < argc; i++) {
+ char *arg = orig_argv[i];
+
+ if (backup && !strcmp(arg, objname))
+ fprintf(stderr, " %s -o %s", backup, objname);
+ else
+ fprintf(stderr, " %s", arg);
+ }
+
+ fprintf(stderr, "\n");
+
+ return 1;
}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 753dbc4f8198..ca3435acc326 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -227,6 +227,7 @@ static bool is_rust_noreturn(const struct symbol *func)
str_ends_with(func->name, "_4core9panicking18panic_bounds_check") ||
str_ends_with(func->name, "_4core9panicking19assert_failed_inner") ||
str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") ||
+ strstr(func->name, "_4core9panicking13assert_failed") ||
strstr(func->name, "_4core9panicking11panic_const24panic_const_") ||
(strstr(func->name, "_4core5slice5index24slice_") &&
str_ends_with(func->name, "_fail"));
@@ -1283,15 +1284,6 @@ static void annotate_call_site(struct objtool_file *file,
if (!sym)
sym = reloc->sym;
- /*
- * Alternative replacement code is just template code which is
- * sometimes copied to the original instruction. For now, don't
- * annotate it. (In the future we might consider annotating the
- * original instruction if/when it ever makes sense to do so.)
- */
- if (!strcmp(insn->sec->name, ".altinstr_replacement"))
- return;
-
if (sym->static_call_tramp) {
list_add_tail(&insn->call_node, &file->static_call_list);
return;
@@ -1349,7 +1341,8 @@ static void annotate_call_site(struct objtool_file *file,
return;
}
- if (insn->type == INSN_CALL && !insn->sec->init)
+ if (insn->type == INSN_CALL && !insn->sec->init &&
+ !insn->_call_dest->embedded_insn)
list_add_tail(&insn->call_node, &file->call_list);
if (!sibling && dead_end_function(file, sym))
@@ -1943,6 +1936,11 @@ out:
return ret;
}
+__weak unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table)
+{
+ return reloc->sym->offset + reloc_addend(reloc);
+}
+
static int add_jump_table(struct objtool_file *file, struct instruction *insn,
struct reloc *next_table)
{
@@ -1953,6 +1951,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
unsigned int prev_offset = 0;
struct reloc *reloc = table;
struct alternative *alt;
+ unsigned long sym_offset;
/*
* Each @reloc is a switch table relocation which points to the target
@@ -1967,15 +1966,24 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
break;
/* Make sure the table entries are consecutive: */
- if (prev_offset && reloc_offset(reloc) != prev_offset + 8)
+ if (prev_offset && reloc_offset(reloc) != prev_offset + arch_reloc_size(reloc))
break;
+ sym_offset = arch_jump_table_sym_offset(reloc, table);
+
/* Detect function pointers from contiguous objects: */
- if (reloc->sym->sec == pfunc->sec &&
- reloc_addend(reloc) == pfunc->offset)
+ if (reloc->sym->sec == pfunc->sec && sym_offset == pfunc->offset)
break;
- dest_insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
+ /*
+ * Clang sometimes leaves dangling unused jump table entries
+ * which point to the end of the function. Ignore them.
+ */
+ if (reloc->sym->sec == pfunc->sec &&
+ sym_offset == pfunc->offset + pfunc->len)
+ goto next;
+
+ dest_insn = find_insn(file, reloc->sym->sec, sym_offset);
if (!dest_insn)
break;
@@ -1992,6 +2000,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
alt->insn = dest_insn;
alt->next = insn->alts;
insn->alts = alt;
+next:
prev_offset = reloc_offset(reloc);
}
@@ -2013,6 +2022,7 @@ static void find_jump_table(struct objtool_file *file, struct symbol *func,
struct reloc *table_reloc;
struct instruction *dest_insn, *orig_insn = insn;
unsigned long table_size;
+ unsigned long sym_offset;
/*
* Backward search using the @first_jump_src links, these help avoid
@@ -2036,7 +2046,10 @@ static void find_jump_table(struct objtool_file *file, struct symbol *func,
table_reloc = arch_find_switch_table(file, insn, &table_size);
if (!table_reloc)
continue;
- dest_insn = find_insn(file, table_reloc->sym->sec, reloc_addend(table_reloc));
+
+ sym_offset = table_reloc->sym->offset + reloc_addend(table_reloc);
+
+ dest_insn = find_insn(file, table_reloc->sym->sec, sym_offset);
if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func)
continue;
@@ -2264,7 +2277,7 @@ static int read_annotate(struct objtool_file *file,
if (sec->sh.sh_entsize != 8) {
static bool warned = false;
- if (!warned) {
+ if (!warned && opts.verbose) {
WARN("%s: dodgy linker, sh_entsize != 8", sec->name);
warned = true;
}
@@ -2462,13 +2475,14 @@ static void mark_rodata(struct objtool_file *file)
*
* - .rodata: can contain GCC switch tables
* - .rodata.<func>: same, if -fdata-sections is being used
- * - .rodata..c_jump_table: contains C annotated jump tables
+ * - .data.rel.ro.c_jump_table: contains C annotated jump tables
*
* .rodata.str1.* sections are ignored; they don't contain jump tables.
*/
for_each_sec(file, sec) {
- if (!strncmp(sec->name, ".rodata", 7) &&
- !strstr(sec->name, ".str1.")) {
+ if ((!strncmp(sec->name, ".rodata", 7) &&
+ !strstr(sec->name, ".str1.")) ||
+ !strncmp(sec->name, ".data.rel.ro", 12)) {
sec->rodata = true;
found = true;
}
@@ -4438,35 +4452,6 @@ static int validate_sls(struct objtool_file *file)
return warnings;
}
-static bool ignore_noreturn_call(struct instruction *insn)
-{
- struct symbol *call_dest = insn_call_dest(insn);
-
- /*
- * FIXME: hack, we need a real noreturn solution
- *
- * Problem is, exc_double_fault() may or may not return, depending on
- * whether CONFIG_X86_ESPFIX64 is set. But objtool has no visibility
- * to the kernel config.
- *
- * Other potential ways to fix it:
- *
- * - have compiler communicate __noreturn functions somehow
- * - remove CONFIG_X86_ESPFIX64
- * - read the .config file
- * - add a cmdline option
- * - create a generic objtool annotation format (vs a bunch of custom
- * formats) and annotate it
- */
- if (!strcmp(call_dest->name, "exc_double_fault")) {
- /* prevent further unreachable warnings for the caller */
- insn->sym->warned = 1;
- return true;
- }
-
- return false;
-}
-
static int validate_reachable_instructions(struct objtool_file *file)
{
struct instruction *insn, *prev_insn;
@@ -4483,8 +4468,8 @@ static int validate_reachable_instructions(struct objtool_file *file)
prev_insn = prev_insn_same_sec(file, insn);
if (prev_insn && prev_insn->dead_end) {
call_dest = insn_call_dest(prev_insn);
- if (call_dest && !ignore_noreturn_call(prev_insn)) {
- WARN_INSN(insn, "%s() is missing a __noreturn annotation",
+ if (call_dest) {
+ WARN_INSN(insn, "%s() missing __noreturn in .c/.h or NORETURN() in noreturns.h",
call_dest->name);
warnings++;
continue;
@@ -4554,7 +4539,7 @@ static int disas_warned_funcs(struct objtool_file *file)
char *funcs = NULL, *tmp;
for_each_sym(file, sym) {
- if (sym->warned) {
+ if (sym->warnings) {
if (!funcs) {
funcs = malloc(strlen(sym->name) + 1);
strcpy(funcs, sym->name);
@@ -4612,8 +4597,10 @@ int check(struct objtool_file *file)
init_cfi_state(&force_undefined_cfi);
force_undefined_cfi.force_undefined = true;
- if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3)))
+ if (!cfi_hash_alloc(1UL << (file->elf->symbol_bits - 3))) {
+ ret = -1;
goto out;
+ }
cfi_hash_add(&init_cfi);
cfi_hash_add(&func_cfi);
@@ -4630,7 +4617,7 @@ int check(struct objtool_file *file)
if (opts.retpoline) {
ret = validate_retpoline(file);
if (ret < 0)
- return ret;
+ goto out;
warnings += ret;
}
@@ -4666,7 +4653,7 @@ int check(struct objtool_file *file)
*/
ret = validate_unrets(file);
if (ret < 0)
- return ret;
+ goto out;
warnings += ret;
}
@@ -4729,7 +4716,7 @@ int check(struct objtool_file *file)
if (opts.prefix) {
ret = add_prefix_symbols(file);
if (ret < 0)
- return ret;
+ goto out;
warnings += ret;
}
@@ -4761,9 +4748,18 @@ int check(struct objtool_file *file)
out:
/*
- * For now, don't fail the kernel build on fatal warnings. These
- * errors are still fairly common due to the growing matrix of
- * supported toolchains and their recent pace of change.
+ * CONFIG_OBJTOOL_WERROR upgrades all warnings (and errors) to actual
+ * errors.
+ *
+ * Note that even "fatal" type errors don't actually return an error
+ * without CONFIG_OBJTOOL_WERROR. That probably needs improved at some
+ * point.
*/
+ if (opts.werror && (ret || warnings)) {
+ if (warnings)
+ WARN("%d warning(s) upgraded to errors", warnings);
+ return 1;
+ }
+
return 0;
}
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 6f64d611faea..be4f4b62730c 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -1302,9 +1302,6 @@ int elf_write(struct elf *elf)
struct section *sec;
Elf_Scn *s;
- if (opts.dryrun)
- return 0;
-
/* Update changed relocation sections and section headers: */
list_for_each_entry(sec, &elf->sections, list) {
if (sec->truncate)
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index d63b46a19f39..089a1acc48a8 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -97,4 +97,7 @@ int arch_rewrite_retpolines(struct objtool_file *file);
bool arch_pc_relative_reloc(struct reloc *reloc);
+unsigned int arch_reloc_size(struct reloc *reloc);
+unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table);
+
#endif /* _ARCH_H */
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index fcca6662c8b4..0fafd0f7a209 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -29,15 +29,16 @@ struct opts {
/* options: */
bool backtrace;
- bool backup;
bool dryrun;
bool link;
bool mnop;
bool module;
bool no_unreachable;
+ const char *output;
bool sec_address;
bool stats;
bool verbose;
+ bool werror;
};
extern struct opts opts;
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index d7e815c2fd15..223ac1c24b90 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -65,10 +65,10 @@ struct symbol {
u8 return_thunk : 1;
u8 fentry : 1;
u8 profiling_func : 1;
- u8 warned : 1;
u8 embedded_insn : 1;
u8 local_label : 1;
u8 frame_pointer : 1;
+ u8 warnings : 2;
struct list_head pv_target;
struct reloc *relocs;
};
diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h
index e7ee7ffccefd..e049679bb17b 100644
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -10,7 +10,7 @@
#include <objtool/check.h>
#include <objtool/elf.h>
-#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
+#define C_JUMP_TABLE_SECTION ".data.rel.ro.c_jump_table"
struct special_alt {
struct list_head list;
diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h
index ac04d3fe4dd9..e72b9d630551 100644
--- a/tools/objtool/include/objtool/warn.h
+++ b/tools/objtool/include/objtool/warn.h
@@ -43,8 +43,10 @@ static inline char *offstr(struct section *sec, unsigned long offset)
#define WARN(format, ...) \
fprintf(stderr, \
- "%s: warning: objtool: " format "\n", \
- objname, ##__VA_ARGS__)
+ "%s: %s: objtool: " format "\n", \
+ objname, \
+ opts.werror ? "error" : "warning", \
+ ##__VA_ARGS__)
#define WARN_FUNC(format, sec, offset, ...) \
({ \
@@ -53,14 +55,22 @@ static inline char *offstr(struct section *sec, unsigned long offset)
free(_str); \
})
+#define WARN_LIMIT 2
+
#define WARN_INSN(insn, format, ...) \
({ \
struct instruction *_insn = (insn); \
- if (!_insn->sym || !_insn->sym->warned) \
+ BUILD_BUG_ON(WARN_LIMIT > 2); \
+ if (!_insn->sym || _insn->sym->warnings < WARN_LIMIT) { \
WARN_FUNC(format, _insn->sec, _insn->offset, \
##__VA_ARGS__); \
- if (_insn->sym) \
- _insn->sym->warned = 1; \
+ if (_insn->sym) \
+ _insn->sym->warnings++; \
+ } else if (_insn->sym && _insn->sym->warnings == WARN_LIMIT) { \
+ WARN_FUNC("skipping duplicate warning(s)", \
+ _insn->sec, _insn->offset); \
+ _insn->sym->warnings++; \
+ } \
})
#define BT_INSN(insn, format, ...) \
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index b2174894f9f7..eacfe3b0a8d1 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -16,10 +16,11 @@ NORETURN(__tdx_hypercall_failed)
NORETURN(__ubsan_handle_builtin_unreachable)
NORETURN(__x64_sys_exit)
NORETURN(__x64_sys_exit_group)
+NORETURN(acpi_processor_ffh_play_dead)
NORETURN(arch_cpu_idle_dead)
NORETURN(bch2_trans_in_restart_error)
NORETURN(bch2_trans_restart_error)
-NORETURN(bch2_trans_unlocked_error)
+NORETURN(bch2_trans_unlocked_or_in_restart_error)
NORETURN(cpu_bringup_and_idle)
NORETURN(cpu_startup_entry)
NORETURN(do_exit)
@@ -34,6 +35,7 @@ NORETURN(kunit_try_catch_throw)
NORETURN(machine_real_restart)
NORETURN(make_task_dead)
NORETURN(mpt_halt_firmware)
+NORETURN(mwait_play_dead)
NORETURN(nmi_panic_self_stop)
NORETURN(panic)
NORETURN(panic_smp_self_stop)
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index f40febdd6e36..1c73fb62fd57 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -18,87 +18,19 @@
bool help;
-const char *objname;
static struct objtool_file file;
-static bool objtool_create_backup(const char *_objname)
+struct objtool_file *objtool_open_read(const char *filename)
{
- int len = strlen(_objname);
- char *buf, *base, *name = malloc(len+6);
- int s, d, l, t;
-
- if (!name) {
- perror("failed backup name malloc");
- return false;
- }
-
- strcpy(name, _objname);
- strcpy(name + len, ".orig");
-
- d = open(name, O_CREAT|O_WRONLY|O_TRUNC, 0644);
- if (d < 0) {
- perror("failed to create backup file");
- return false;
- }
-
- s = open(_objname, O_RDONLY);
- if (s < 0) {
- perror("failed to open orig file");
- return false;
- }
-
- buf = malloc(4096);
- if (!buf) {
- perror("failed backup data malloc");
- return false;
- }
-
- while ((l = read(s, buf, 4096)) > 0) {
- base = buf;
- do {
- t = write(d, base, l);
- if (t < 0) {
- perror("failed backup write");
- return false;
- }
- base += t;
- l -= t;
- } while (l);
- }
-
- if (l < 0) {
- perror("failed backup read");
- return false;
- }
-
- free(name);
- free(buf);
- close(d);
- close(s);
-
- return true;
-}
-
-struct objtool_file *objtool_open_read(const char *_objname)
-{
- if (objname) {
- if (strcmp(objname, _objname)) {
- WARN("won't handle more than one file at a time");
- return NULL;
- }
- return &file;
+ if (file.elf) {
+ WARN("won't handle more than one file at a time");
+ return NULL;
}
- objname = _objname;
- file.elf = elf_open_read(objname, O_RDWR);
+ file.elf = elf_open_read(filename, O_RDWR);
if (!file.elf)
return NULL;
- if (opts.backup && !objtool_create_backup(objname)) {
- WARN("can't create backup file");
- return NULL;
- }
-
hash_init(file.insn_hash);
INIT_LIST_HEAD(&file.retpoline_call_list);
INIT_LIST_HEAD(&file.return_thunk_list);
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index a62247efb64f..05ef0e297837 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -10,7 +10,7 @@
#include <objtool/warn.h>
#include <objtool/endianness.h>
-int orc_dump(const char *_objname)
+int orc_dump(const char *filename)
{
int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0;
struct orc_entry *orc = NULL;
@@ -26,12 +26,9 @@ int orc_dump(const char *_objname)
Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL;
struct elf dummy_elf = {};
-
- objname = _objname;
-
elf_version(EV_CURRENT);
- fd = open(objname, O_RDONLY);
+ fd = open(filename, O_RDONLY);
if (fd == -1) {
perror("open");
return -1;
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 55d6ce9ea52f..05c083bb1122 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -161,47 +161,6 @@ export VPATH
SOURCE := $(shell ln -sf $(srctree)/tools/perf $(OUTPUT)/source)
endif
-# Beautify output
-# ---------------------------------------------------------------------------
-#
-# Most of build commands in Kbuild start with "cmd_". You can optionally define
-# "quiet_cmd_*". If defined, the short log is printed. Otherwise, no log from
-# that command is printed by default.
-#
-# e.g.)
-# quiet_cmd_depmod = DEPMOD $(MODLIB)
-# cmd_depmod = $(srctree)/scripts/depmod.sh $(DEPMOD) $(KERNELRELEASE)
-#
-# A simple variant is to prefix commands with $(Q) - that's useful
-# for commands that shall be hidden in non-verbose mode.
-#
-# $(Q)$(MAKE) $(build)=scripts/basic
-#
-# To put more focus on warnings, be less verbose as default
-# Use 'make V=1' to see the full commands
-
-ifeq ($(V),1)
- quiet =
- Q =
-else
- quiet=quiet_
- Q=@
-endif
-
-# If the user is running make -s (silent mode), suppress echoing of commands
-# make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS.
-ifeq ($(filter 3.%,$(MAKE_VERSION)),)
-short-opts := $(firstword -$(MAKEFLAGS))
-else
-short-opts := $(filter-out --%,$(MAKEFLAGS))
-endif
-
-ifneq ($(findstring s,$(short-opts)),)
- quiet=silent_
-endif
-
-export quiet Q
-
# Do not use make's built-in rules
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index d3c6e10dce73..a4499e5a6f9c 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -26,8 +26,6 @@ FILES=(
"include/linux/hash.h"
"include/linux/list-sort.h"
"include/uapi/linux/hw_breakpoint.h"
- "arch/x86/include/asm/disabled-features.h"
- "arch/x86/include/asm/required-features.h"
"arch/x86/include/asm/cpufeatures.h"
"arch/x86/include/asm/inat_types.h"
"arch/x86/include/asm/emulate_prefix.h"
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 8d5011a0bf60..26057af6b5a1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -1056,7 +1056,7 @@ static const struct platform_data turbostat_pdata[] = {
* Missing support for
* INTEL_ICELAKE
* INTEL_ATOM_SILVERMONT_MID
- * INTEL_ATOM_AIRMONT_MID
+ * INTEL_ATOM_SILVERMONT_MID2
* INTEL_ATOM_AIRMONT_NP
*/
{ 0, NULL },
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index f3e15e9efa76..dc4333d23189 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -7,6 +7,13 @@
#ifndef __SCX_COMMON_BPF_H
#define __SCX_COMMON_BPF_H
+/*
+ * The generated kfunc prototypes in vmlinux.h are missing address space
+ * attributes which cause build failures. For now, suppress the generated
+ * prototypes. See https://github.com/sched-ext/scx/issues/1111.
+ */
+#define BPF_NO_KFUNC_PROTOTYPES
+
#ifdef LSP
#define __bpf__
#include "../vmlinux.h"
@@ -18,6 +25,7 @@
#include <bpf/bpf_tracing.h>
#include <asm-generic/errno.h>
#include "user_exit_info.h"
+#include "enum_defs.autogen.h"
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
@@ -62,21 +70,28 @@ void scx_bpf_dump_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym
u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak;
u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak;
void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym __weak;
+u32 scx_bpf_nr_node_ids(void) __ksym __weak;
u32 scx_bpf_nr_cpu_ids(void) __ksym __weak;
+int scx_bpf_cpu_node(s32 cpu) __ksym __weak;
const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym __weak;
const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym __weak;
void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym __weak;
+const struct cpumask *scx_bpf_get_idle_cpumask_node(int node) __ksym __weak;
const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym;
+const struct cpumask *scx_bpf_get_idle_smtmask_node(int node) __ksym __weak;
const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym;
void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym;
bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym;
+s32 scx_bpf_pick_idle_cpu_node(const cpumask_t *cpus_allowed, int node, u64 flags) __ksym __weak;
s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
+s32 scx_bpf_pick_any_cpu_node(const cpumask_t *cpus_allowed, int node, u64 flags) __ksym __weak;
s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym;
bool scx_bpf_task_running(const struct task_struct *p) __ksym;
s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym;
struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym;
struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak;
u64 scx_bpf_now(void) __ksym __weak;
+void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __weak;
/*
* Use the following as @it__iter when calling scx_bpf_dsq_move[_vtime]() from
@@ -84,6 +99,9 @@ u64 scx_bpf_now(void) __ksym __weak;
*/
#define BPF_FOR_EACH_ITER (&___it)
+#define scx_read_event(e, name) \
+ (bpf_core_field_exists((e)->name) ? (e)->name : 0)
+
static inline __attribute__((format(printf, 1, 2)))
void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {}
@@ -270,8 +288,16 @@ void bpf_obj_drop_impl(void *kptr, void *meta) __ksym;
#define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL))
#define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL)
-void bpf_list_push_front(struct bpf_list_head *head, struct bpf_list_node *node) __ksym;
-void bpf_list_push_back(struct bpf_list_head *head, struct bpf_list_node *node) __ksym;
+int bpf_list_push_front_impl(struct bpf_list_head *head,
+ struct bpf_list_node *node,
+ void *meta, __u64 off) __ksym;
+#define bpf_list_push_front(head, node) bpf_list_push_front_impl(head, node, NULL, 0)
+
+int bpf_list_push_back_impl(struct bpf_list_head *head,
+ struct bpf_list_node *node,
+ void *meta, __u64 off) __ksym;
+#define bpf_list_push_back(head, node) bpf_list_push_back_impl(head, node, NULL, 0)
+
struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym;
struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym;
struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
@@ -404,6 +430,17 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask)
return (const struct cpumask *)mask;
}
+/*
+ * Return true if task @p cannot migrate to a different CPU, false
+ * otherwise.
+ */
+static inline bool is_migration_disabled(const struct task_struct *p)
+{
+ if (bpf_core_field_exists(p->migration_disabled))
+ return p->migration_disabled;
+ return false;
+}
+
/* rcu */
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
@@ -421,7 +458,7 @@ void bpf_rcu_read_unlock(void) __ksym;
*/
static inline s64 time_delta(u64 after, u64 before)
{
- return (s64)(after - before) > 0 ? : 0;
+ return (s64)(after - before) > 0 ? (s64)(after - before) : 0;
}
/**
@@ -565,6 +602,22 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
__u.__val; \
})
+#define READ_ONCE_ARENA(type, x) \
+({ \
+ union { type __val; char __c[1]; } __u = \
+ { .__c = { 0 } }; \
+ __read_once_size((void *)&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+#define WRITE_ONCE_ARENA(type, x, val) \
+({ \
+ union { type __val; char __c[1]; } __u = \
+ { .__val = (val) }; \
+ __write_once_size((void *)&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
/*
* log2_u32 - Compute the base 2 logarithm of a 32-bit exponential value.
* @v: The value for which we're computing the base 2 logarithm.
diff --git a/tools/sched_ext/include/scx/common.h b/tools/sched_ext/include/scx/common.h
index dc18b99e55cd..1dc76bd84296 100644
--- a/tools/sched_ext/include/scx/common.h
+++ b/tools/sched_ext/include/scx/common.h
@@ -16,6 +16,7 @@
#include <stdlib.h>
#include <stdint.h>
#include <errno.h>
+#include "enum_defs.autogen.h"
typedef uint8_t u8;
typedef uint16_t u16;
diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h
index 50e1499ae093..9252e1a00556 100644
--- a/tools/sched_ext/include/scx/compat.bpf.h
+++ b/tools/sched_ext/include/scx/compat.bpf.h
@@ -125,12 +125,107 @@ bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter,
false; \
})
+/**
+ * __COMPAT_is_enq_cpu_selected - Test if SCX_ENQ_CPU_SELECTED is on
+ * in a compatible way. We will preserve this __COMPAT helper until v6.16.
+ *
+ * @enq_flags: enqueue flags from ops.enqueue()
+ *
+ * Return: True if SCX_ENQ_CPU_SELECTED is turned on in @enq_flags
+ */
+static inline bool __COMPAT_is_enq_cpu_selected(u64 enq_flags)
+{
+#ifdef HAVE_SCX_ENQ_CPU_SELECTED
+ /*
+ * This is the case that a BPF code compiled against vmlinux.h
+ * where the enum SCX_ENQ_CPU_SELECTED exists.
+ */
+
+ /*
+ * We should temporarily suspend the macro expansion of
+ * 'SCX_ENQ_CPU_SELECTED'. This avoids 'SCX_ENQ_CPU_SELECTED' being
+ * rewritten to '__SCX_ENQ_CPU_SELECTED' when 'SCX_ENQ_CPU_SELECTED'
+ * is defined in 'scripts/gen_enums.py'.
+ */
+#pragma push_macro("SCX_ENQ_CPU_SELECTED")
+#undef SCX_ENQ_CPU_SELECTED
+ u64 flag;
+
+ /*
+ * When the kernel did not have SCX_ENQ_CPU_SELECTED,
+ * select_task_rq_scx() has never been skipped. Thus, this case
+ * should be considered that the CPU has already been selected.
+ */
+ if (!bpf_core_enum_value_exists(enum scx_enq_flags,
+ SCX_ENQ_CPU_SELECTED))
+ return true;
+
+ flag = bpf_core_enum_value(enum scx_enq_flags, SCX_ENQ_CPU_SELECTED);
+ return enq_flags & flag;
+
+ /*
+ * Once done, resume the macro expansion of 'SCX_ENQ_CPU_SELECTED'.
+ */
+#pragma pop_macro("SCX_ENQ_CPU_SELECTED")
+#else
+ /*
+ * This is the case that a BPF code compiled against vmlinux.h
+ * where the enum SCX_ENQ_CPU_SELECTED does NOT exist.
+ */
+ return true;
+#endif /* HAVE_SCX_ENQ_CPU_SELECTED */
+}
+
+
#define scx_bpf_now() \
(bpf_ksym_exists(scx_bpf_now) ? \
scx_bpf_now() : \
bpf_ktime_get_ns())
/*
+ * v6.15: Introduce event counters.
+ *
+ * Preserve the following macro until v6.17.
+ */
+#define __COMPAT_scx_bpf_events(events, size) \
+ (bpf_ksym_exists(scx_bpf_events) ? \
+ scx_bpf_events(events, size) : ({}))
+
+/*
+ * v6.15: Introduce NUMA-aware kfuncs to operate with per-node idle
+ * cpumasks.
+ *
+ * Preserve the following __COMPAT_scx_*_node macros until v6.17.
+ */
+#define __COMPAT_scx_bpf_nr_node_ids() \
+ (bpf_ksym_exists(scx_bpf_nr_node_ids) ? \
+ scx_bpf_nr_node_ids() : 1U)
+
+#define __COMPAT_scx_bpf_cpu_node(cpu) \
+ (bpf_ksym_exists(scx_bpf_cpu_node) ? \
+ scx_bpf_cpu_node(cpu) : 0)
+
+#define __COMPAT_scx_bpf_get_idle_cpumask_node(node) \
+ (bpf_ksym_exists(scx_bpf_get_idle_cpumask_node) ? \
+ scx_bpf_get_idle_cpumask_node(node) : \
+ scx_bpf_get_idle_cpumask()) \
+
+#define __COMPAT_scx_bpf_get_idle_smtmask_node(node) \
+ (bpf_ksym_exists(scx_bpf_get_idle_smtmask_node) ? \
+ scx_bpf_get_idle_smtmask_node(node) : \
+ scx_bpf_get_idle_smtmask())
+
+#define __COMPAT_scx_bpf_pick_idle_cpu_node(cpus_allowed, node, flags) \
+ (bpf_ksym_exists(scx_bpf_pick_idle_cpu_node) ? \
+ scx_bpf_pick_idle_cpu_node(cpus_allowed, node, flags) : \
+ scx_bpf_pick_idle_cpu(cpus_allowed, flags))
+
+#define __COMPAT_scx_bpf_pick_any_cpu_node(cpus_allowed, node, flags) \
+ (bpf_ksym_exists(scx_bpf_pick_any_cpu_node) ? \
+ scx_bpf_pick_any_cpu_node(cpus_allowed, node, flags) : \
+ scx_bpf_pick_any_cpu(cpus_allowed, flags))
+
+/*
* Define sched_ext_ops. This may be expanded to define multiple variants for
* backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH().
*/
diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h
index b50280e2ba2b..35c67c5174ac 100644
--- a/tools/sched_ext/include/scx/compat.h
+++ b/tools/sched_ext/include/scx/compat.h
@@ -106,8 +106,20 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field
return false;
}
-#define SCX_OPS_SWITCH_PARTIAL \
- __COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL")
+#define SCX_OPS_FLAG(name) __COMPAT_ENUM_OR_ZERO("scx_ops_flags", #name)
+
+#define SCX_OPS_KEEP_BUILTIN_IDLE SCX_OPS_FLAG(SCX_OPS_KEEP_BUILTIN_IDLE)
+#define SCX_OPS_ENQ_LAST SCX_OPS_FLAG(SCX_OPS_ENQ_LAST)
+#define SCX_OPS_ENQ_EXITING SCX_OPS_FLAG(SCX_OPS_ENQ_EXITING)
+#define SCX_OPS_SWITCH_PARTIAL SCX_OPS_FLAG(SCX_OPS_SWITCH_PARTIAL)
+#define SCX_OPS_ENQ_MIGRATION_DISABLED SCX_OPS_FLAG(SCX_OPS_ENQ_MIGRATION_DISABLED)
+#define SCX_OPS_ALLOW_QUEUED_WAKEUP SCX_OPS_FLAG(SCX_OPS_ALLOW_QUEUED_WAKEUP)
+#define SCX_OPS_BUILTIN_IDLE_PER_NODE SCX_OPS_FLAG(SCX_OPS_BUILTIN_IDLE_PER_NODE)
+
+#define SCX_PICK_IDLE_FLAG(name) __COMPAT_ENUM_OR_ZERO("scx_pick_idle_cpu_flags", #name)
+
+#define SCX_PICK_IDLE_CORE SCX_PICK_IDLE_FLAG(SCX_PICK_IDLE_CORE)
+#define SCX_PICK_IDLE_IN_NODE SCX_PICK_IDLE_FLAG(SCX_PICK_IDLE_IN_NODE)
static inline long scx_hotplug_seq(void)
{
diff --git a/tools/sched_ext/include/scx/enum_defs.autogen.h b/tools/sched_ext/include/scx/enum_defs.autogen.h
new file mode 100644
index 000000000000..6e6c45f14fe1
--- /dev/null
+++ b/tools/sched_ext/include/scx/enum_defs.autogen.h
@@ -0,0 +1,120 @@
+/*
+ * WARNING: This file is autogenerated from gen_enum_defs.py [1].
+ *
+ * [1] https://github.com/sched-ext/scx/blob/main/scripts/gen_enum_defs.py
+ */
+
+#ifndef __ENUM_DEFS_AUTOGEN_H__
+#define __ENUM_DEFS_AUTOGEN_H__
+
+#define HAVE_SCX_DSP_DFL_MAX_BATCH
+#define HAVE_SCX_DSP_MAX_LOOPS
+#define HAVE_SCX_WATCHDOG_MAX_TIMEOUT
+#define HAVE_SCX_EXIT_BT_LEN
+#define HAVE_SCX_EXIT_MSG_LEN
+#define HAVE_SCX_EXIT_DUMP_DFL_LEN
+#define HAVE_SCX_CPUPERF_ONE
+#define HAVE_SCX_OPS_TASK_ITER_BATCH
+#define HAVE_SCX_CPU_PREEMPT_RT
+#define HAVE_SCX_CPU_PREEMPT_DL
+#define HAVE_SCX_CPU_PREEMPT_STOP
+#define HAVE_SCX_CPU_PREEMPT_UNKNOWN
+#define HAVE_SCX_DEQ_SLEEP
+#define HAVE_SCX_DEQ_CORE_SCHED_EXEC
+#define HAVE_SCX_DSQ_FLAG_BUILTIN
+#define HAVE_SCX_DSQ_FLAG_LOCAL_ON
+#define HAVE_SCX_DSQ_INVALID
+#define HAVE_SCX_DSQ_GLOBAL
+#define HAVE_SCX_DSQ_LOCAL
+#define HAVE_SCX_DSQ_LOCAL_ON
+#define HAVE_SCX_DSQ_LOCAL_CPU_MASK
+#define HAVE_SCX_DSQ_ITER_REV
+#define HAVE___SCX_DSQ_ITER_HAS_SLICE
+#define HAVE___SCX_DSQ_ITER_HAS_VTIME
+#define HAVE___SCX_DSQ_ITER_USER_FLAGS
+#define HAVE___SCX_DSQ_ITER_ALL_FLAGS
+#define HAVE_SCX_DSQ_LNODE_ITER_CURSOR
+#define HAVE___SCX_DSQ_LNODE_PRIV_SHIFT
+#define HAVE_SCX_ENQ_WAKEUP
+#define HAVE_SCX_ENQ_HEAD
+#define HAVE_SCX_ENQ_CPU_SELECTED
+#define HAVE_SCX_ENQ_PREEMPT
+#define HAVE_SCX_ENQ_REENQ
+#define HAVE_SCX_ENQ_LAST
+#define HAVE___SCX_ENQ_INTERNAL_MASK
+#define HAVE_SCX_ENQ_CLEAR_OPSS
+#define HAVE_SCX_ENQ_DSQ_PRIQ
+#define HAVE_SCX_TASK_DSQ_ON_PRIQ
+#define HAVE_SCX_TASK_QUEUED
+#define HAVE_SCX_TASK_RESET_RUNNABLE_AT
+#define HAVE_SCX_TASK_DEQD_FOR_SLEEP
+#define HAVE_SCX_TASK_STATE_SHIFT
+#define HAVE_SCX_TASK_STATE_BITS
+#define HAVE_SCX_TASK_STATE_MASK
+#define HAVE_SCX_TASK_CURSOR
+#define HAVE_SCX_ECODE_RSN_HOTPLUG
+#define HAVE_SCX_ECODE_ACT_RESTART
+#define HAVE_SCX_EXIT_NONE
+#define HAVE_SCX_EXIT_DONE
+#define HAVE_SCX_EXIT_UNREG
+#define HAVE_SCX_EXIT_UNREG_BPF
+#define HAVE_SCX_EXIT_UNREG_KERN
+#define HAVE_SCX_EXIT_SYSRQ
+#define HAVE_SCX_EXIT_ERROR
+#define HAVE_SCX_EXIT_ERROR_BPF
+#define HAVE_SCX_EXIT_ERROR_STALL
+#define HAVE_SCX_KF_UNLOCKED
+#define HAVE_SCX_KF_CPU_RELEASE
+#define HAVE_SCX_KF_DISPATCH
+#define HAVE_SCX_KF_ENQUEUE
+#define HAVE_SCX_KF_SELECT_CPU
+#define HAVE_SCX_KF_REST
+#define HAVE___SCX_KF_RQ_LOCKED
+#define HAVE___SCX_KF_TERMINAL
+#define HAVE_SCX_KICK_IDLE
+#define HAVE_SCX_KICK_PREEMPT
+#define HAVE_SCX_KICK_WAIT
+#define HAVE_SCX_OPI_BEGIN
+#define HAVE_SCX_OPI_NORMAL_BEGIN
+#define HAVE_SCX_OPI_NORMAL_END
+#define HAVE_SCX_OPI_CPU_HOTPLUG_BEGIN
+#define HAVE_SCX_OPI_CPU_HOTPLUG_END
+#define HAVE_SCX_OPI_END
+#define HAVE_SCX_OPS_ENABLING
+#define HAVE_SCX_OPS_ENABLED
+#define HAVE_SCX_OPS_DISABLING
+#define HAVE_SCX_OPS_DISABLED
+#define HAVE_SCX_OPS_KEEP_BUILTIN_IDLE
+#define HAVE_SCX_OPS_ENQ_LAST
+#define HAVE_SCX_OPS_ENQ_EXITING
+#define HAVE_SCX_OPS_SWITCH_PARTIAL
+#define HAVE_SCX_OPS_HAS_CGROUP_WEIGHT
+#define HAVE_SCX_OPS_ALL_FLAGS
+#define HAVE_SCX_OPSS_NONE
+#define HAVE_SCX_OPSS_QUEUEING
+#define HAVE_SCX_OPSS_QUEUED
+#define HAVE_SCX_OPSS_DISPATCHING
+#define HAVE_SCX_OPSS_QSEQ_SHIFT
+#define HAVE_SCX_PICK_IDLE_CORE
+#define HAVE_SCX_OPS_NAME_LEN
+#define HAVE_SCX_SLICE_DFL
+#define HAVE_SCX_SLICE_INF
+#define HAVE_SCX_RQ_ONLINE
+#define HAVE_SCX_RQ_CAN_STOP_TICK
+#define HAVE_SCX_RQ_BAL_PENDING
+#define HAVE_SCX_RQ_BAL_KEEP
+#define HAVE_SCX_RQ_BYPASSING
+#define HAVE_SCX_RQ_IN_WAKEUP
+#define HAVE_SCX_RQ_IN_BALANCE
+#define HAVE_SCX_TASK_NONE
+#define HAVE_SCX_TASK_INIT
+#define HAVE_SCX_TASK_READY
+#define HAVE_SCX_TASK_ENABLED
+#define HAVE_SCX_TASK_NR_STATES
+#define HAVE_SCX_TG_ONLINE
+#define HAVE_SCX_TG_INITED
+#define HAVE_SCX_WAKE_FORK
+#define HAVE_SCX_WAKE_TTWU
+#define HAVE_SCX_WAKE_SYNC
+
+#endif /* __ENUM_DEFS_AUTOGEN_H__ */
diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c
index 1e9f74525d8f..6ba6e610eeaa 100644
--- a/tools/sched_ext/scx_central.c
+++ b/tools/sched_ext/scx_central.c
@@ -10,6 +10,7 @@
#include <unistd.h>
#include <inttypes.h>
#include <signal.h>
+#include <assert.h>
#include <libgen.h>
#include <bpf/bpf.h>
#include <scx/common.h>
@@ -60,14 +61,22 @@ restart:
skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus();
skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
+ assert(skel->rodata->nr_cpu_ids <= INT32_MAX);
+
while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) {
switch (opt) {
case 's':
skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000;
break;
- case 'c':
- skel->rodata->central_cpu = strtoul(optarg, NULL, 0);
+ case 'c': {
+ u32 central_cpu = strtoul(optarg, NULL, 0);
+ if (central_cpu >= skel->rodata->nr_cpu_ids) {
+ fprintf(stderr, "invalid central CPU id value, %u given (%u max)\n", central_cpu, skel->rodata->nr_cpu_ids);
+ return -1;
+ }
+ skel->rodata->central_cpu = (s32)central_cpu;
break;
+ }
case 'v':
verbose = true;
break;
@@ -96,7 +105,7 @@ restart:
*/
cpuset = CPU_ALLOC(skel->rodata->nr_cpu_ids);
SCX_BUG_ON(!cpuset, "Failed to allocate cpuset");
- CPU_ZERO(cpuset);
+ CPU_ZERO_S(CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids), cpuset);
CPU_SET(skel->rodata->central_cpu, cpuset);
SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset),
"Failed to affinitize to central CPU %d (max %d)",
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
index 3a20bb0c014a..26c40ca4f36c 100644
--- a/tools/sched_ext/scx_qmap.bpf.c
+++ b/tools/sched_ext/scx_qmap.bpf.c
@@ -231,7 +231,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags)
}
/* if select_cpu() wasn't called, try direct dispatch */
- if (!(enq_flags & SCX_ENQ_CPU_SELECTED) &&
+ if (!__COMPAT_is_enq_cpu_selected(enq_flags) &&
(cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) {
__sync_fetch_and_add(&nr_ddsp_from_enq, 1);
scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags);
@@ -763,6 +763,8 @@ static void dump_shared_dsq(void)
static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
{
+ struct scx_event_stats events;
+
bpf_rcu_read_lock();
dispatch_highpri(true);
bpf_rcu_read_unlock();
@@ -772,6 +774,25 @@ static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
if (print_shared_dsq)
dump_shared_dsq();
+ __COMPAT_scx_bpf_events(&events, sizeof(events));
+
+ bpf_printk("%35s: %lld", "SCX_EV_SELECT_CPU_FALLBACK",
+ scx_read_event(&events, SCX_EV_SELECT_CPU_FALLBACK));
+ bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE",
+ scx_read_event(&events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE));
+ bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_KEEP_LAST",
+ scx_read_event(&events, SCX_EV_DISPATCH_KEEP_LAST));
+ bpf_printk("%35s: %lld", "SCX_EV_ENQ_SKIP_EXITING",
+ scx_read_event(&events, SCX_EV_ENQ_SKIP_EXITING));
+ bpf_printk("%35s: %lld", "SCX_EV_ENQ_SLICE_DFL",
+ scx_read_event(&events, SCX_EV_ENQ_SLICE_DFL));
+ bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DURATION",
+ scx_read_event(&events, SCX_EV_BYPASS_DURATION));
+ bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DISPATCH",
+ scx_read_event(&events, SCX_EV_BYPASS_DISPATCH));
+ bpf_printk("%35s: %lld", "SCX_EV_BYPASS_ACTIVATE",
+ scx_read_event(&events, SCX_EV_BYPASS_ACTIVATE));
+
bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
return 0;
}
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 0aa4005017c7..45f4abef7064 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -136,6 +136,33 @@ else
NO_SUBDIR = :
endif
+# Beautify output
+# ---------------------------------------------------------------------------
+#
+# Most of build commands in Kbuild start with "cmd_". You can optionally define
+# "quiet_cmd_*". If defined, the short log is printed. Otherwise, no log from
+# that command is printed by default.
+#
+# e.g.)
+# quiet_cmd_depmod = DEPMOD $(MODLIB)
+# cmd_depmod = $(srctree)/scripts/depmod.sh $(DEPMOD) $(KERNELRELEASE)
+#
+# A simple variant is to prefix commands with $(Q) - that's useful
+# for commands that shall be hidden in non-verbose mode.
+#
+# $(Q)$(MAKE) $(build)=scripts/basic
+#
+# To put more focus on warnings, be less verbose as default
+# Use 'make V=1' to see the full commands
+
+ifeq ($(V),1)
+ quiet =
+ Q =
+else
+ quiet = quiet_
+ Q = @
+endif
+
# If the user is running make -s (silent mode), suppress echoing of commands
# make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS.
ifeq ($(filter 3.%,$(MAKE_VERSION)),)
@@ -146,8 +173,11 @@ endif
ifneq ($(findstring s,$(short-opts)),)
silent=1
+ quiet=silent_
endif
+export quiet Q
+
#
# Define a callable command for descending to a new directory
#
diff --git a/tools/sound/dapm-graph b/tools/sound/dapm-graph
index f14bdfedee8f..b6196ee5065a 100755
--- a/tools/sound/dapm-graph
+++ b/tools/sound/dapm-graph
@@ -10,7 +10,7 @@ set -eu
STYLE_COMPONENT_ON="color=dodgerblue;style=bold"
STYLE_COMPONENT_OFF="color=gray40;style=filled;fillcolor=gray90"
-STYLE_NODE_ON="shape=box,style=bold,color=green4"
+STYLE_NODE_ON="shape=box,style=bold,color=green4,fillcolor=white"
STYLE_NODE_OFF="shape=box,style=filled,color=gray30,fillcolor=gray95"
# Print usage and exit
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 8daac70c2f9d..2ebaf5e6942e 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -35,6 +35,7 @@ TARGETS += filesystems/epoll
TARGETS += filesystems/fat
TARGETS += filesystems/overlayfs
TARGETS += filesystems/statmount
+TARGETS += filesystems/mount-notify
TARGETS += firmware
TARGETS += fpu
TARGETS += ftrace
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
index eb6a4fea8c79..f7f9e7088bb3 100644
--- a/tools/testing/selftests/bpf/Makefile.docs
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -7,12 +7,6 @@ INSTALL ?= install
RM ?= rm -f
RMDIR ?= rmdir --ignore-fail-on-non-empty
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
prefix ?= /usr/local
mandir ?= $(prefix)/man
man2dir = $(mandir)/man2
diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
index 66191ae9863c..79c3ccadb962 100644
--- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
@@ -120,11 +120,12 @@ static void validate_fetch_results(int outer_map_fd,
static void fetch_and_validate(int outer_map_fd,
struct bpf_map_batch_opts *opts,
- __u32 batch_size, bool delete_entries)
+ __u32 batch_size, bool delete_entries,
+ bool has_holes)
{
- __u32 *fetched_keys, *fetched_values, total_fetched = 0;
- __u32 batch_key = 0, fetch_count, step_size;
- int err, max_entries = OUTER_MAP_ENTRIES;
+ int err, max_entries = OUTER_MAP_ENTRIES - !!has_holes;
+ __u32 *fetched_keys, *fetched_values, total_fetched = 0, i;
+ __u32 batch_key = 0, fetch_count, step_size = batch_size;
__u32 value_size = sizeof(__u32);
/* Total entries needs to be fetched */
@@ -134,9 +135,8 @@ static void fetch_and_validate(int outer_map_fd,
"Memory allocation failed for fetched_keys or fetched_values",
"error=%s\n", strerror(errno));
- for (step_size = batch_size;
- step_size <= max_entries;
- step_size += batch_size) {
+ /* hash map may not always return full batch */
+ for (i = 0; i < OUTER_MAP_ENTRIES; i++) {
fetch_count = step_size;
err = delete_entries
? bpf_map_lookup_and_delete_batch(outer_map_fd,
@@ -155,6 +155,7 @@ static void fetch_and_validate(int outer_map_fd,
if (err && errno == ENOSPC) {
/* Fetch again with higher batch size */
total_fetched = 0;
+ step_size += batch_size;
continue;
}
@@ -184,18 +185,19 @@ static void fetch_and_validate(int outer_map_fd,
}
static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
- enum bpf_map_type inner_map_type)
+ enum bpf_map_type inner_map_type,
+ bool has_holes)
{
+ __u32 max_entries = OUTER_MAP_ENTRIES - !!has_holes;
__u32 *outer_map_keys, *inner_map_fds;
- __u32 max_entries = OUTER_MAP_ENTRIES;
LIBBPF_OPTS(bpf_map_batch_opts, opts);
__u32 value_size = sizeof(__u32);
int batch_size[2] = {5, 10};
__u32 map_index, op_index;
int outer_map_fd, ret;
- outer_map_keys = calloc(max_entries, value_size);
- inner_map_fds = calloc(max_entries, value_size);
+ outer_map_keys = calloc(OUTER_MAP_ENTRIES, value_size);
+ inner_map_fds = calloc(OUTER_MAP_ENTRIES, value_size);
CHECK((!outer_map_keys || !inner_map_fds),
"Memory allocation failed for outer_map_keys or inner_map_fds",
"error=%s\n", strerror(errno));
@@ -209,6 +211,24 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
? 9 : 1000) - map_index;
+ /* This condition is only meaningful for array of maps.
+ *
+ * max_entries == OUTER_MAP_ENTRIES - 1 if it is true. Say
+ * max_entries is short for n, then outer_map_keys looks like:
+ *
+ * [n, n-1, ... 2, 1]
+ *
+ * We change it to
+ *
+ * [n, n-1, ... 2, 0]
+ *
+ * So it will leave key 1 as a hole. It will serve to test the
+ * correctness when batch on an array: a "non-exist" key might be
+ * actually allocated and returned from key iteration.
+ */
+ if (has_holes)
+ outer_map_keys[max_entries - 1]--;
+
/* batch operation - map_update */
ret = bpf_map_update_batch(outer_map_fd, outer_map_keys,
inner_map_fds, &max_entries, &opts);
@@ -219,15 +239,17 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
/* batch operation - map_lookup */
for (op_index = 0; op_index < 2; ++op_index)
fetch_and_validate(outer_map_fd, &opts,
- batch_size[op_index], false);
+ batch_size[op_index], false,
+ has_holes);
/* batch operation - map_lookup_delete */
if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
fetch_and_validate(outer_map_fd, &opts,
- max_entries, true /*delete*/);
+ max_entries, true /*delete*/,
+ has_holes);
/* close all map fds */
- for (map_index = 0; map_index < max_entries; map_index++)
+ for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++)
close(inner_map_fds[map_index]);
close(outer_map_fd);
@@ -237,16 +259,20 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
void test_map_in_map_batch_ops_array(void)
{
- _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
printf("%s:PASS with inner ARRAY map\n", __func__);
- _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, false);
printf("%s:PASS with inner HASH map\n", __func__);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, true);
+ printf("%s:PASS with inner ARRAY map with holes\n", __func__);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, true);
+ printf("%s:PASS with inner HASH map with holes\n", __func__);
}
void test_map_in_map_batch_ops_hash(void)
{
- _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
printf("%s:PASS with inner ARRAY map\n", __func__);
- _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH, false);
printf("%s:PASS with inner HASH map\n", __func__);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
index 3729fbfd3084..80b153d3ddec 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
@@ -542,8 +542,12 @@ static void detach_program(struct bpf_flow *skel, int prog_fd)
static int set_port_drop(int pf, bool multi_port)
{
+ char dst_port[16];
+
+ snprintf(dst_port, sizeof(dst_port), "%d", CFG_PORT_INNER);
+
SYS(fail, "tc qdisc add dev lo ingress");
- SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s",
+ SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s %s %s",
"dev lo",
"parent FFFF:",
"protocol", pf == PF_INET6 ? "ipv6" : "ip",
@@ -551,6 +555,7 @@ static int set_port_drop(int pf, bool multi_port)
"flower",
"ip_proto udp",
"src_port", multi_port ? "8-10" : "9",
+ "dst_port", dst_port,
"action drop");
return 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 884ad87783d5..1e3e4392dcca 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -111,31 +111,35 @@ out:
static void test_sockmap_vsock_delete_on_close(void)
{
- int err, c, p, map;
- const int zero = 0;
-
- err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
- if (!ASSERT_OK(err, "create_pair(AF_VSOCK)"))
- return;
+ int map, c, p, err, zero = 0;
map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
sizeof(int), 1, NULL);
- if (!ASSERT_GE(map, 0, "bpf_map_create")) {
- close(c);
- goto out;
- }
+ if (!ASSERT_OK_FD(map, "bpf_map_create"))
+ return;
- err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
- close(c);
- if (!ASSERT_OK(err, "bpf_map_update"))
- goto out;
+ err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+ if (!ASSERT_OK(err, "create_pair"))
+ goto close_map;
+
+ if (xbpf_map_update_elem(map, &zero, &c, BPF_NOEXIST))
+ goto close_socks;
+
+ xclose(c);
+ xclose(p);
+
+ err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+ if (!ASSERT_OK(err, "create_pair"))
+ goto close_map;
- err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
ASSERT_OK(err, "after close(), bpf_map_update");
-out:
- close(p);
- close(map);
+close_socks:
+ xclose(c);
+ xclose(p);
+close_map:
+ xclose(map);
}
static void test_skmsg_helpers(enum bpf_map_type map_type)
@@ -522,8 +526,8 @@ static void test_sockmap_skb_verdict_shutdown(void)
if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
goto out_close;
- n = recv(c1, &b, 1, SOCK_NONBLOCK);
- ASSERT_EQ(n, 0, "recv_timeout(fin)");
+ n = recv(c1, &b, 1, MSG_DONTWAIT);
+ ASSERT_EQ(n, 0, "recv(fin)");
out_close:
close(c1);
close(p1);
@@ -531,57 +535,6 @@ out:
test_sockmap_pass_prog__destroy(skel);
}
-static void test_sockmap_stream_pass(void)
-{
- int zero = 0, sent, recvd;
- int verdict, parser;
- int err, map;
- int c = -1, p = -1;
- struct test_sockmap_pass_prog *pass = NULL;
- char snd[256] = "0123456789";
- char rcv[256] = "0";
-
- pass = test_sockmap_pass_prog__open_and_load();
- verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
- parser = bpf_program__fd(pass->progs.prog_skb_parser);
- map = bpf_map__fd(pass->maps.sock_map_rx);
-
- err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
- if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
- goto out;
-
- err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
- if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
- goto out;
-
- err = create_pair(AF_INET, SOCK_STREAM, &c, &p);
- if (err)
- goto out;
-
- /* sk_data_ready of 'p' will be replaced by strparser handler */
- err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
- if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
- goto out_close;
-
- /*
- * as 'prog_skb_parser' return the original skb len and
- * 'prog_skb_verdict' return SK_PASS, the kernel will just
- * pass it through to original socket 'p'
- */
- sent = xsend(c, snd, sizeof(snd), 0);
- ASSERT_EQ(sent, sizeof(snd), "xsend(c)");
-
- recvd = recv_timeout(p, rcv, sizeof(rcv), SOCK_NONBLOCK,
- IO_TIMEOUT_SEC);
- ASSERT_EQ(recvd, sizeof(rcv), "recv_timeout(p)");
-
-out_close:
- close(c);
- close(p);
-
-out:
- test_sockmap_pass_prog__destroy(pass);
-}
static void test_sockmap_skb_verdict_fionread(bool pass_prog)
{
@@ -628,7 +581,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
/* On DROP test there will be no data to read */
if (pass_prog) {
- recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
+ recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC);
ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
}
@@ -1061,6 +1014,34 @@ destroy:
test_sockmap_pass_prog__destroy(skel);
}
+static void test_sockmap_vsock_unconnected(void)
+{
+ struct sockaddr_storage addr;
+ int map, s, zero = 0;
+ socklen_t alen;
+
+ map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
+ sizeof(int), 1, NULL);
+ if (!ASSERT_OK_FD(map, "bpf_map_create"))
+ return;
+
+ s = xsocket(AF_VSOCK, SOCK_STREAM, 0);
+ if (s < 0)
+ goto close_map;
+
+ /* Fail connect(), but trigger transport assignment. */
+ init_addr_loopback(AF_VSOCK, &addr, &alen);
+ if (!ASSERT_ERR(connect(s, sockaddr(&addr), alen), "connect"))
+ goto close_sock;
+
+ ASSERT_ERR(bpf_map_update_elem(map, &zero, &s, BPF_ANY), "map_update");
+
+close_sock:
+ xclose(s);
+close_map:
+ xclose(map);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -1101,8 +1082,6 @@ void test_sockmap_basic(void)
test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
if (test__start_subtest("sockmap skb_verdict shutdown"))
test_sockmap_skb_verdict_shutdown();
- if (test__start_subtest("sockmap stream parser and verdict pass"))
- test_sockmap_stream_pass();
if (test__start_subtest("sockmap skb_verdict fionread"))
test_sockmap_skb_verdict_fionread(true);
if (test__start_subtest("sockmap skb_verdict fionread on drop"))
@@ -1127,4 +1106,6 @@ void test_sockmap_basic(void)
test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH);
if (test__start_subtest("sockmap skb_verdict vsock poll"))
test_sockmap_skb_verdict_vsock_poll();
+ if (test__start_subtest("sockmap vsock unconnected"))
+ test_sockmap_vsock_unconnected();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
new file mode 100644
index 000000000000..621b3b71888e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
@@ -0,0 +1,454 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <netinet/tcp.h>
+#include <test_progs.h>
+#include "sockmap_helpers.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_strp.skel.h"
+
+#define STRP_PKT_HEAD_LEN 4
+#define STRP_PKT_BODY_LEN 6
+#define STRP_PKT_FULL_LEN (STRP_PKT_HEAD_LEN + STRP_PKT_BODY_LEN)
+
+static const char packet[STRP_PKT_FULL_LEN] = "head+body\0";
+static const int test_packet_num = 100;
+
+/* Current implementation of tcp_bpf_recvmsg_parser() invokes data_ready
+ * with sk held if an skb exists in sk_receive_queue. Then for the
+ * data_ready implementation of strparser, it will delay the read
+ * operation if sk is held and EAGAIN is returned.
+ */
+static int sockmap_strp_consume_pre_data(int p)
+{
+ int recvd;
+ bool retried = false;
+ char rcv[10];
+
+retry:
+ errno = 0;
+ recvd = recv_timeout(p, rcv, sizeof(rcv), 0, 1);
+ if (recvd < 0 && errno == EAGAIN && retried == false) {
+ /* On the first call, EAGAIN will certainly be returned.
+ * A 1-second wait is enough for the workqueue to finish.
+ */
+ sleep(1);
+ retried = true;
+ goto retry;
+ }
+
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv error or truncated data") ||
+ !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+ "data mismatch"))
+ return -1;
+ return 0;
+}
+
+static struct test_sockmap_strp *sockmap_strp_init(int *out_map, bool pass,
+ bool need_parser)
+{
+ struct test_sockmap_strp *strp = NULL;
+ int verdict, parser;
+ int err;
+
+ strp = test_sockmap_strp__open_and_load();
+ *out_map = bpf_map__fd(strp->maps.sock_map);
+
+ if (need_parser)
+ parser = bpf_program__fd(strp->progs.prog_skb_parser_partial);
+ else
+ parser = bpf_program__fd(strp->progs.prog_skb_parser);
+
+ if (pass)
+ verdict = bpf_program__fd(strp->progs.prog_skb_verdict_pass);
+ else
+ verdict = bpf_program__fd(strp->progs.prog_skb_verdict);
+
+ err = bpf_prog_attach(parser, *out_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
+ goto err;
+
+ err = bpf_prog_attach(verdict, *out_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
+ goto err;
+
+ return strp;
+err:
+ test_sockmap_strp__destroy(strp);
+ return NULL;
+}
+
+/* Dispatch packets to different socket by packet size:
+ *
+ * ------ ------
+ * | pkt4 || pkt1 |... > remote socket
+ * ------ ------ / ------ ------
+ * | pkt8 | pkt7 |...
+ * ------ ------ \ ------ ------
+ * | pkt3 || pkt2 |... > local socket
+ * ------ ------
+ */
+static void test_sockmap_strp_dispatch_pkt(int family, int sotype)
+{
+ int i, j, zero = 0, one = 1, recvd;
+ int err, map;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ struct test_sockmap_strp *strp = NULL;
+ int test_cnt = 6;
+ char rcv[10];
+ struct {
+ char data[7];
+ int data_len;
+ int send_cnt;
+ int *receiver;
+ } send_dir[2] = {
+ /* data expected to deliver to local */
+ {"llllll", 6, 0, &p0},
+ /* data expected to deliver to remote */
+ {"rrrrr", 5, 0, &c1}
+ };
+
+ strp = sockmap_strp_init(&map, false, false);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs()"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+ goto out_close;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+ goto out_close;
+
+ err = setsockopt(c1, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero));
+ if (!ASSERT_OK(err, "setsockopt(TCP_NODELAY)"))
+ goto out_close;
+
+ /* deliver data with data size greater than 5 to local */
+ strp->data->verdict_max_size = 5;
+
+ for (i = 0; i < test_cnt; i++) {
+ int d = i % 2;
+
+ xsend(c0, send_dir[d].data, send_dir[d].data_len, 0);
+ send_dir[d].send_cnt++;
+ }
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < send_dir[i].send_cnt; j++) {
+ int expected = send_dir[i].data_len;
+
+ recvd = recv_timeout(*send_dir[i].receiver, rcv,
+ expected, MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, expected, "recv_timeout()"))
+ goto out_close;
+ if (!ASSERT_OK(memcmp(send_dir[i].data, rcv, recvd),
+ "data mismatch"))
+ goto out_close;
+ }
+ }
+out_close:
+ close(c0);
+ close(c1);
+ close(p0);
+ close(p1);
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* We have multiple packets in one skb
+ * ------------ ------------ ------------
+ * | packet1 | packet2 | ...
+ * ------------ ------------ ------------
+ */
+static void test_sockmap_strp_multiple_pkt(int family, int sotype)
+{
+ int i, zero = 0;
+ int sent, recvd, total;
+ int err, map;
+ int c = -1, p = -1;
+ struct test_sockmap_strp *strp = NULL;
+ char *snd = NULL, *rcv = NULL;
+
+ strp = sockmap_strp_init(&map, true, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (err)
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
+ goto out_close;
+
+ /* construct multiple packets in one buffer */
+ total = test_packet_num * STRP_PKT_FULL_LEN;
+ snd = malloc(total);
+ rcv = malloc(total + 1);
+ if (!ASSERT_TRUE(snd, "malloc(snd)") ||
+ !ASSERT_TRUE(rcv, "malloc(rcv)"))
+ goto out_close;
+
+ for (i = 0; i < test_packet_num; i++) {
+ memcpy(snd + i * STRP_PKT_FULL_LEN,
+ packet, STRP_PKT_FULL_LEN);
+ }
+
+ sent = xsend(c, snd, total, 0);
+ if (!ASSERT_EQ(sent, total, "xsend(c)"))
+ goto out_close;
+
+ /* try to recv one more byte to avoid truncation check */
+ recvd = recv_timeout(p, rcv, total + 1, MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, total, "recv(rcv)"))
+ goto out_close;
+
+ /* we sent TCP segment with multiple encapsulation
+ * then check whether packets are handled correctly
+ */
+ if (!ASSERT_OK(memcmp(snd, rcv, total), "data mismatch"))
+ goto out_close;
+
+out_close:
+ close(c);
+ close(p);
+ if (snd)
+ free(snd);
+ if (rcv)
+ free(rcv);
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* Test strparser with partial read */
+static void test_sockmap_strp_partial_read(int family, int sotype)
+{
+ int zero = 0, recvd, off;
+ int err, map;
+ int c = -1, p = -1;
+ struct test_sockmap_strp *strp = NULL;
+ char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+ strp = sockmap_strp_init(&map, true, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (err)
+ goto out;
+
+ /* sk_data_ready of 'p' will be replaced by strparser handler */
+ err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
+ goto out_close;
+
+ /* 1.1 send partial head, 1 byte header left */
+ off = STRP_PKT_HEAD_LEN - 1;
+ xsend(c, packet, off, 0);
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "partial head sent, expected no data"))
+ goto out_close;
+
+ /* 1.2 send remaining head and body */
+ xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
+ goto out_close;
+
+ /* 2.1 send partial head, 1 byte header left */
+ off = STRP_PKT_HEAD_LEN - 1;
+ xsend(c, packet, off, 0);
+
+ /* 2.2 send remaining head and partial body, 1 byte body left */
+ xsend(c, packet + off, STRP_PKT_FULL_LEN - off - 1, 0);
+ off = STRP_PKT_FULL_LEN - 1;
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+ if (!ASSERT_EQ(-1, recvd, "partial body sent, expected no data"))
+ goto out_close;
+
+ /* 2.3 send remaining body */
+ xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
+ goto out_close;
+
+out_close:
+ close(c);
+ close(p);
+
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* Test simple socket read/write with strparser + FIONREAD */
+static void test_sockmap_strp_pass(int family, int sotype, bool fionread)
+{
+ int zero = 0, pkt_size = STRP_PKT_FULL_LEN, sent, recvd, avail;
+ int err, map;
+ int c = -1, p = -1;
+ int test_cnt = 10, i;
+ struct test_sockmap_strp *strp = NULL;
+ char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+ strp = sockmap_strp_init(&map, true, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ err = create_pair(family, sotype, &c, &p);
+ if (err)
+ goto out;
+
+ /* inject some data before bpf process, it should be read
+ * correctly because we check sk_receive_queue in
+ * tcp_bpf_recvmsg_parser().
+ */
+ sent = xsend(c, packet, pkt_size, 0);
+ if (!ASSERT_EQ(sent, pkt_size, "xsend(pre-data)"))
+ goto out_close;
+
+ /* sk_data_ready of 'p' will be replaced by strparser handler */
+ err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
+ goto out_close;
+
+ /* consume previous data we injected */
+ if (sockmap_strp_consume_pre_data(p))
+ goto out_close;
+
+ /* Previously, we encountered issues such as deadlocks and
+ * sequence errors that resulted in the inability to read
+ * continuously. Therefore, we perform multiple iterations
+ * of testing here.
+ */
+ for (i = 0; i < test_cnt; i++) {
+ sent = xsend(c, packet, pkt_size, 0);
+ if (!ASSERT_EQ(sent, pkt_size, "xsend(c)"))
+ goto out_close;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, pkt_size, "recv_timeout(p)") ||
+ !ASSERT_OK(memcmp(packet, rcv, pkt_size),
+ "memcmp, data mismatch"))
+ goto out_close;
+ }
+
+ if (fionread) {
+ sent = xsend(c, packet, pkt_size, 0);
+ if (!ASSERT_EQ(sent, pkt_size, "second xsend(c)"))
+ goto out_close;
+
+ err = ioctl(p, FIONREAD, &avail);
+ if (!ASSERT_OK(err, "ioctl(FIONREAD) error") ||
+ !ASSERT_EQ(avail, pkt_size, "ioctl(FIONREAD)"))
+ goto out_close;
+
+ recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, pkt_size, "second recv_timeout(p)") ||
+ !ASSERT_OK(memcmp(packet, rcv, pkt_size),
+ "second memcmp, data mismatch"))
+ goto out_close;
+ }
+
+out_close:
+ close(c);
+ close(p);
+
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+/* Test strparser with verdict mode */
+static void test_sockmap_strp_verdict(int family, int sotype)
+{
+ int zero = 0, one = 1, sent, recvd, off;
+ int err, map;
+ int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+ struct test_sockmap_strp *strp = NULL;
+ char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+ strp = sockmap_strp_init(&map, false, true);
+ if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+ return;
+
+ /* We simulate a reverse proxy server.
+ * When p0 receives data from c0, we forward it to c1.
+ * From c1's perspective, it will consider this data
+ * as being sent by p1.
+ */
+ err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs()"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+ goto out_close;
+
+ err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+ goto out_close;
+
+ sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
+ if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "xsend(c0)"))
+ goto out_close;
+
+ recvd = recv_timeout(c1, rcv, sizeof(rcv), MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv_timeout(c1)") ||
+ !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+ "received data does not match the sent data"))
+ goto out_close;
+
+ /* send again to ensure the stream is functioning correctly. */
+ sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
+ if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "second xsend(c0)"))
+ goto out_close;
+
+ /* partial read */
+ off = STRP_PKT_FULL_LEN / 2;
+ recvd = recv_timeout(c1, rcv, off, MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+ recvd += recv_timeout(c1, rcv + off, sizeof(rcv) - off, MSG_DONTWAIT,
+ IO_TIMEOUT_SEC);
+
+ if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "partial recv_timeout(c1)") ||
+ !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+ "partial received data does not match the sent data"))
+ goto out_close;
+
+out_close:
+ close(c0);
+ close(c1);
+ close(p0);
+ close(p1);
+out:
+ test_sockmap_strp__destroy(strp);
+}
+
+void test_sockmap_strp(void)
+{
+ if (test__start_subtest("sockmap strp tcp pass"))
+ test_sockmap_strp_pass(AF_INET, SOCK_STREAM, false);
+ if (test__start_subtest("sockmap strp tcp v6 pass"))
+ test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, false);
+ if (test__start_subtest("sockmap strp tcp pass fionread"))
+ test_sockmap_strp_pass(AF_INET, SOCK_STREAM, true);
+ if (test__start_subtest("sockmap strp tcp v6 pass fionread"))
+ test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, true);
+ if (test__start_subtest("sockmap strp tcp verdict"))
+ test_sockmap_strp_verdict(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp v6 verdict"))
+ test_sockmap_strp_verdict(AF_INET6, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp partial read"))
+ test_sockmap_strp_partial_read(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp multiple packets"))
+ test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM);
+ if (test__start_subtest("sockmap strp tcp dispatch"))
+ test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index c7f74f068e78..df27535995af 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -52,10 +52,10 @@ static void test_xdp_with_cpumap_helpers(void)
ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
/* send a packet to trigger any potential bugs in there */
- char data[10] = {};
+ char data[ETH_HLEN] = {};
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = 10,
+ .data_size_in = sizeof(data),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 27ffed17d4be..461ab18705d5 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -23,7 +23,7 @@ static void test_xdp_with_devmap_helpers(void)
__u32 len = sizeof(info);
int err, dm_fd, dm_fd_redir, map_fd;
struct nstoken *nstoken = NULL;
- char data[10] = {};
+ char data[ETH_HLEN] = {};
__u32 idx = 0;
SYS(out_close, "ip netns add %s", TEST_NS);
@@ -58,7 +58,7 @@ static void test_xdp_with_devmap_helpers(void)
/* send a packet to trigger any potential bugs in there */
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = 10,
+ .data_size_in = sizeof(data),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
@@ -158,7 +158,7 @@ static void test_xdp_with_devmap_helpers_veth(void)
struct nstoken *nstoken = NULL;
__u32 len = sizeof(info);
int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst;
- char data[10] = {};
+ char data[ETH_HLEN] = {};
__u32 idx = 0;
SYS(out_close, "ip netns add %s", TEST_NS);
@@ -208,7 +208,7 @@ static void test_xdp_with_devmap_helpers_veth(void)
/* send a packet to trigger any potential bugs in there */
DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
.data_in = &data,
- .data_size_in = 10,
+ .data_size_in = sizeof(data),
.flags = BPF_F_TEST_XDP_LIVE_FRAMES,
.repeat = 1,
);
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
new file mode 100644
index 000000000000..dde3d5bec515
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+int verdict_max_size = 10000;
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ __u32 one = 1;
+
+ if (skb->len > verdict_max_size)
+ return SK_PASS;
+
+ return bpf_sk_redirect_map(skb, &sock_map, one, 0);
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict_pass(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser_partial(struct __sk_buff *skb)
+{
+ /* agreement with the test program on a 4-byte size header
+ * and 6-byte body.
+ */
+ if (skb->len < 4) {
+ /* need more header to determine full length */
+ return 0;
+ }
+ /* return full length decoded from header.
+ * the return value may be larger than skb->len which
+ * means framework must wait body coming.
+ */
+ return 10;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c
index 29eb9568633f..0a187ff725cc 100644
--- a/tools/testing/selftests/bpf/progs/verifier_array_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c
@@ -713,4 +713,19 @@ unsigned int non_stack_key_lookup(void)
return val->index;
}
+SEC("socket")
+__description("doesn't reject UINT64_MAX as s64 for irrelevant maps")
+__success __retval(42)
+unsigned int doesnt_reject_irrelevant_maps(void)
+{
+ __u64 key = 0xFFFFFFFFFFFFFFFF;
+ struct test_val *val;
+
+ val = bpf_map_lookup_elem(&map_hash_48b, &key);
+ if (val)
+ return val->index;
+
+ return 42;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
index 3f45512fb512..7406c24be1ac 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Test the special cpuset v1 hotplug case where a cpuset become empty of
diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py
index 2e8a74aff543..58f3291fed12 100755
--- a/tools/testing/selftests/damon/damon_nr_regions.py
+++ b/tools/testing/selftests/damon/damon_nr_regions.py
@@ -65,6 +65,7 @@ def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions):
test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % (
real_nr_regions, min_nr_regions, max_nr_regions)
+ collected_nr_regions.sort()
if (collected_nr_regions[0] < min_nr_regions or
collected_nr_regions[-1] > max_nr_regions):
print('fail %s' % test_name)
@@ -109,6 +110,7 @@ def main():
attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs
attrs.min_nr_regions = 3
attrs.max_nr_regions = 7
+ attrs.update_us = 100000
err = kdamonds.kdamonds[0].commit()
if err is not None:
proc.terminate()
diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py
index 7d4c6bb2e3cd..57c4937aaed2 100755
--- a/tools/testing/selftests/damon/damos_quota.py
+++ b/tools/testing/selftests/damon/damos_quota.py
@@ -51,16 +51,19 @@ def main():
nr_quota_exceeds = scheme.stats.qt_exceeds
wss_collected.sort()
+ nr_expected_quota_exceeds = 0
for wss in wss_collected:
if wss > sz_quota:
print('quota is not kept: %s > %s' % (wss, sz_quota))
print('collected samples are as below')
print('\n'.join(['%d' % wss for wss in wss_collected]))
exit(1)
+ if wss == sz_quota:
+ nr_expected_quota_exceeds += 1
- if nr_quota_exceeds < len(wss_collected):
- print('quota is not always exceeded: %d > %d' %
- (len(wss_collected), nr_quota_exceeds))
+ if nr_quota_exceeds < nr_expected_quota_exceeds:
+ print('quota is exceeded less than expected: %d < %d' %
+ (nr_quota_exceeds, nr_expected_quota_exceeds))
exit(1)
if __name__ == '__main__':
diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py
index 18246f3b62f7..f76e0412b564 100755
--- a/tools/testing/selftests/damon/damos_quota_goal.py
+++ b/tools/testing/selftests/damon/damos_quota_goal.py
@@ -63,6 +63,9 @@ def main():
if last_effective_bytes != 0 else -1.0))
if last_effective_bytes == goal.effective_bytes:
+ # effective quota was already minimum that cannot be more reduced
+ if expect_increase is False and last_effective_bytes == 1:
+ continue
print('efective bytes not changed: %d' % goal.effective_bytes)
exit(1)
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index edc56e2cc606..7bc148889ca7 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -11,8 +11,8 @@ ALL_TESTS="
lib_dir=$(dirname "$0")
source ${lib_dir}/bond_topo_3d1c.sh
-c_maddr="33:33:00:00:00:10"
-g_maddr="33:33:00:00:02:54"
+c_maddr="33:33:ff:00:00:10"
+g_maddr="33:33:ff:00:02:54"
skip_prio()
{
diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py
index 394971b25c0b..873f5219e41d 100755
--- a/tools/testing/selftests/drivers/net/hds.py
+++ b/tools/testing/selftests/drivers/net/hds.py
@@ -2,17 +2,54 @@
# SPDX-License-Identifier: GPL-2.0
import errno
+import os
from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx
-from lib.py import EthtoolFamily, NlError
+from lib.py import CmdExitFailure, EthtoolFamily, NlError
from lib.py import NetDrvEnv
+from lib.py import defer, ethtool, ip
-def get_hds(cfg, netnl) -> None:
+
+def _get_hds_mode(cfg, netnl) -> str:
try:
rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
except NlError as e:
raise KsftSkipEx('ring-get not supported by device')
if 'tcp-data-split' not in rings:
raise KsftSkipEx('tcp-data-split not supported by device')
+ return rings['tcp-data-split']
+
+
+def _xdp_onoff(cfg):
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ ip("link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, prog))
+ ip("link set dev %s xdp off" % cfg.ifname)
+
+
+def _ioctl_ringparam_modify(cfg, netnl) -> None:
+ """
+ Helper for performing a hopefully unimportant IOCTL SET.
+ IOCTL does not support HDS, so it should not affect the HDS config.
+ """
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+
+ if 'tx' not in rings:
+ raise KsftSkipEx('setting Tx ring size not supported')
+
+ try:
+ ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] // 2}")
+ except CmdExitFailure as e:
+ ethtool(f"--disable-netlink -G {cfg.ifname} tx {rings['tx'] * 2}")
+ defer(ethtool, f"-G {cfg.ifname} tx {rings['tx']}")
+
+
+def get_hds(cfg, netnl) -> None:
+ _get_hds_mode(cfg, netnl)
+
def get_hds_thresh(cfg, netnl) -> None:
try:
@@ -104,6 +141,103 @@ def set_hds_thresh_gt(cfg, netnl) -> None:
netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt})
ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+def set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "auto" / UNKNOWN mode, XDP installation should work.
+ """
+ mode = _get_hds_mode(cfg, netnl)
+ if mode == 'enabled':
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ _xdp_onoff(cfg)
+
+
+def enabled_set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "enabled" mode, XDP installation should not work.
+ """
+ _get_hds_mode(cfg, netnl)
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled'})
+
+ defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ with ksft_raises(CmdExitFailure) as e:
+ _xdp_onoff(cfg)
+
+
+def set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "auto" / UNKNOWN mode, XDP installation should work.
+ """
+ mode = _get_hds_mode(cfg, netnl)
+ if mode == 'enabled':
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ _xdp_onoff(cfg)
+
+
+def enabled_set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "enabled" mode, XDP installation should not work.
+ """
+ _get_hds_mode(cfg, netnl) # Trigger skip if not supported
+
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled'})
+ defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ with ksft_raises(CmdExitFailure) as e:
+ _xdp_onoff(cfg)
+
+
+def ioctl(cfg, netnl) -> None:
+ mode1 = _get_hds_mode(cfg, netnl)
+ _ioctl_ringparam_modify(cfg, netnl)
+ mode2 = _get_hds_mode(cfg, netnl)
+
+ ksft_eq(mode1, mode2)
+
+
+def ioctl_set_xdp(cfg, netnl) -> None:
+ """
+ Like set_xdp(), but we perturb the settings via the legacy ioctl.
+ """
+ mode = _get_hds_mode(cfg, netnl)
+ if mode == 'enabled':
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ _ioctl_ringparam_modify(cfg, netnl)
+
+ _xdp_onoff(cfg)
+
+
+def ioctl_enabled_set_xdp(cfg, netnl) -> None:
+ """
+ Enable single-buffer XDP on the device.
+ When HDS is in "enabled" mode, XDP installation should not work.
+ """
+ _get_hds_mode(cfg, netnl) # Trigger skip if not supported
+
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'enabled'})
+ defer(netnl.rings_set, {'header': {'dev-index': cfg.ifindex},
+ 'tcp-data-split': 'unknown'})
+
+ with ksft_raises(CmdExitFailure) as e:
+ _xdp_onoff(cfg)
+
+
def main() -> None:
with NetDrvEnv(__file__, queue_count=3) as cfg:
ksft_run([get_hds,
@@ -112,7 +246,12 @@ def main() -> None:
set_hds_enable,
set_hds_thresh_zero,
set_hds_thresh_max,
- set_hds_thresh_gt],
+ set_hds_thresh_gt,
+ set_xdp,
+ enabled_set_xdp,
+ ioctl,
+ ioctl_set_xdp,
+ ioctl_enabled_set_xdp],
args=(cfg, EthtoolFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
index eb83e7b48797..fc69bfcc37c4 100755
--- a/tools/testing/selftests/drivers/net/ping.py
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -1,49 +1,219 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+import os
+import random, string, time
from lib.py import ksft_run, ksft_exit
-from lib.py import ksft_eq
-from lib.py import NetDrvEpEnv
+from lib.py import ksft_eq, KsftSkipEx, KsftFailEx
+from lib.py import EthtoolFamily, NetDrvEpEnv
from lib.py import bkg, cmd, wait_port_listen, rand_port
+from lib.py import defer, ethtool, ip
+remote_ifname=""
+no_sleep=False
-def test_v4(cfg) -> None:
+def _test_v4(cfg) -> None:
cfg.require_v4()
cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}")
cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote)
+ cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v4}")
+ cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v4}", host=cfg.remote)
-
-def test_v6(cfg) -> None:
+def _test_v6(cfg) -> None:
cfg.require_v6()
- cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}")
- cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote)
-
+ cmd(f"ping -c 1 -W5 {cfg.remote_v6}")
+ cmd(f"ping -c 1 -W5 {cfg.v6}", host=cfg.remote)
+ cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.remote_v6}")
+ cmd(f"ping -s 65000 -c 1 -W0.5 {cfg.v6}", host=cfg.remote)
-def test_tcp(cfg) -> None:
+def _test_tcp(cfg) -> None:
cfg.require_cmd("socat", remote=True)
port = rand_port()
listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT"
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
with bkg(listen_cmd, exit_wait=True) as nc:
wait_port_listen(port)
- cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
+ cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
shell=True, host=cfg.remote)
- ksft_eq(nc.stdout.strip(), "ping")
+ ksft_eq(nc.stdout.strip(), test_string)
+ test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(65536))
with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
wait_port_listen(port, host=cfg.remote)
- cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
- ksft_eq(nc.stdout.strip(), "ping")
-
+ cmd(f"echo {test_string} | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
+ ksft_eq(nc.stdout.strip(), test_string)
+
+def _set_offload_checksum(cfg, netnl, on) -> None:
+ try:
+ ethtool(f" -K {cfg.ifname} rx {on} tx {on} ")
+ except:
+ return
+
+def _set_xdp_generic_sb_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True)
+ defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off")
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_generic_mb_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote)
+ ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog))
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off")
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_native_sb_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+ cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True)
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+ xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+ if xdp_info['xdp']['mode'] != 1:
+ """
+ If the interface doesn't support native-mode, it falls back to generic mode.
+ The mode value 1 is native and 2 is generic.
+ So it raises an exception if mode is not 1(native mode).
+ """
+ raise KsftSkipEx('device does not support native-XDP')
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_native_mb_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote)
+ defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote)
+ try:
+ cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True)
+ defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off")
+ except Exception as e:
+ raise KsftSkipEx('device does not support native-multi-buffer XDP')
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def _set_xdp_offload_on(cfg) -> None:
+ test_dir = os.path.dirname(os.path.realpath(__file__))
+ prog = test_dir + "/../../net/lib/xdp_dummy.bpf.o"
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+ try:
+ cmd(f"ip link set dev {cfg.ifname} xdpoffload obj {prog} sec xdp", shell=True)
+ except Exception as e:
+ raise KsftSkipEx('device does not support offloaded XDP')
+ defer(ip, f"link set dev {cfg.ifname} xdpoffload off")
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+ if no_sleep != True:
+ time.sleep(10)
+
+def get_interface_info(cfg) -> None:
+ global remote_ifname
+ global no_sleep
+
+ remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_v4} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout
+ remote_ifname = remote_info.rstrip('\n')
+ if remote_ifname == "":
+ raise KsftFailEx('Can not get remote interface')
+ local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0]
+ if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim":
+ no_sleep=True
+ if 'linkinfo' in local_info and local_info['linkinfo']['info_kind'] == "veth":
+ no_sleep=True
+
+def set_interface_init(cfg) -> None:
+ cmd(f"ip link set dev {cfg.ifname} mtu 1500", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True)
+ cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True)
+ cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote)
+
+def test_default(cfg, netnl) -> None:
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_generic_sb(cfg, netnl) -> None:
+ _set_xdp_generic_sb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_generic_mb(cfg, netnl) -> None:
+ _set_xdp_generic_mb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_native_sb(cfg, netnl) -> None:
+ _set_xdp_native_sb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_native_mb(cfg, netnl) -> None:
+ _set_xdp_native_mb_on(cfg)
+ _set_offload_checksum(cfg, netnl, "off")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+ _set_offload_checksum(cfg, netnl, "on")
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
+
+def test_xdp_offload(cfg, netnl) -> None:
+ _set_xdp_offload_on(cfg)
+ _test_v4(cfg)
+ _test_v6(cfg)
+ _test_tcp(cfg)
def main() -> None:
with NetDrvEpEnv(__file__) as cfg:
- ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ get_interface_info(cfg)
+ set_interface_init(cfg)
+ ksft_run([test_default,
+ test_xdp_generic_sb,
+ test_xdp_generic_mb,
+ test_xdp_native_sb,
+ test_xdp_native_mb,
+ test_xdp_offload],
+ args=(cfg, EthtoolFamily()))
ksft_exit()
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index 38303da957ee..8a518905a9f9 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -45,10 +45,9 @@ def addremove_queues(cfg, nl) -> None:
netnl = EthtoolFamily()
channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}})
- if channels['combined-count'] == 0:
- rx_type = 'rx'
- else:
- rx_type = 'combined'
+ rx_type = 'rx'
+ if channels.get('combined-count', 0) > 0:
+ rx_type = 'combined'
expected = curr_queues - 1
cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10)
diff --git a/tools/testing/selftests/filesystems/mount-notify/.gitignore b/tools/testing/selftests/filesystems/mount-notify/.gitignore
new file mode 100644
index 000000000000..82a4846cbc4b
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/*_test
diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile
new file mode 100644
index 000000000000..10be0227b5ae
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
+TEST_GEN_PROGS := mount-notify_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
new file mode 100644
index 000000000000..4a2d5c454fd1
--- /dev/null
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <linux/fanotify.h>
+#include <unistd.h>
+#include <sys/fanotify.h>
+#include <sys/syscall.h>
+
+#include "../../kselftest_harness.h"
+#include "../statmount/statmount.h"
+
+#ifndef FAN_MNT_ATTACH
+struct fanotify_event_info_mnt {
+ struct fanotify_event_info_header hdr;
+ __u64 mnt_id;
+};
+#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */
+#endif
+
+#ifndef FAN_MNT_DETACH
+#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */
+#endif
+
+#ifndef FAN_REPORT_MNT
+#define FAN_REPORT_MNT 0x00004000 /* Report mount events */
+#endif
+
+#ifndef FAN_MARK_MNTNS
+#define FAN_MARK_MNTNS 0x00000110
+#endif
+
+static uint64_t get_mnt_id(struct __test_metadata *const _metadata,
+ const char *path)
+{
+ struct statx sx;
+
+ ASSERT_EQ(statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx), 0);
+ ASSERT_TRUE(!!(sx.stx_mask & STATX_MNT_ID_UNIQUE));
+ return sx.stx_mnt_id;
+}
+
+static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
+
+FIXTURE(fanotify) {
+ int fan_fd;
+ char buf[256];
+ unsigned int rem;
+ void *next;
+ char root_mntpoint[sizeof(root_mntpoint_templ)];
+ int orig_root;
+ int ns_fd;
+ uint64_t root_id;
+};
+
+FIXTURE_SETUP(fanotify)
+{
+ int ret;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
+ ASSERT_GE(self->ns_fd, 0);
+
+ ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0);
+
+ strcpy(self->root_mntpoint, root_mntpoint_templ);
+ ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
+
+ self->orig_root = open("/", O_PATH | O_CLOEXEC);
+ ASSERT_GE(self->orig_root, 0);
+
+ ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
+
+ ASSERT_EQ(chroot(self->root_mntpoint), 0);
+
+ ASSERT_EQ(chdir("/"), 0);
+
+ ASSERT_EQ(mkdir("a", 0700), 0);
+
+ ASSERT_EQ(mkdir("b", 0700), 0);
+
+ self->root_id = get_mnt_id(_metadata, "/");
+ ASSERT_NE(self->root_id, 0);
+
+ self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0);
+ ASSERT_GE(self->fan_fd, 0);
+
+ ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+
+ self->rem = 0;
+}
+
+FIXTURE_TEARDOWN(fanotify)
+{
+ ASSERT_EQ(self->rem, 0);
+ close(self->fan_fd);
+
+ ASSERT_EQ(fchdir(self->orig_root), 0);
+
+ ASSERT_EQ(chroot("."), 0);
+
+ EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
+ EXPECT_EQ(chdir(self->root_mntpoint), 0);
+ EXPECT_EQ(chdir("/"), 0);
+ EXPECT_EQ(rmdir(self->root_mntpoint), 0);
+}
+
+static uint64_t expect_notify(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t *mask)
+{
+ struct fanotify_event_metadata *meta;
+ struct fanotify_event_info_mnt *mnt;
+ unsigned int thislen;
+
+ if (!self->rem) {
+ ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf));
+ ASSERT_GT(len, 0);
+
+ self->rem = len;
+ self->next = (void *) self->buf;
+ }
+
+ meta = self->next;
+ ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
+
+ thislen = meta->event_len;
+ self->rem -= thislen;
+ self->next += thislen;
+
+ *mask = meta->mask;
+ thislen -= sizeof(*meta);
+
+ mnt = ((void *) meta) + meta->event_len - thislen;
+
+ ASSERT_EQ(thislen, sizeof(*mnt));
+
+ return mnt->mnt_id;
+}
+
+static void expect_notify_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ unsigned int n, uint64_t mask[], uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify(_metadata, self, &mask[i]);
+}
+
+static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t expect_mask)
+{
+ uint64_t mntid, mask;
+
+ mntid = expect_notify(_metadata, self, &mask);
+ ASSERT_EQ(expect_mask, mask);
+
+ return mntid;
+}
+
+
+static void expect_notify_mask_n(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(fanotify) *self,
+ uint64_t mask, unsigned int n, uint64_t mnts[])
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ mnts[i] = expect_notify_mask(_metadata, self, mask);
+}
+
+static void verify_mount_ids(struct __test_metadata *const _metadata,
+ const uint64_t list1[], const uint64_t list2[],
+ size_t num)
+{
+ unsigned int i, j;
+
+ // Check that neither list has any duplicates
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (i != j) {
+ ASSERT_NE(list1[i], list1[j]);
+ ASSERT_NE(list2[i], list2[j]);
+ }
+ }
+ }
+ // Check that all list1 memebers can be found in list2. Together with
+ // the above it means that the list1 and list2 represent the same sets.
+ for (i = 0; i < num; i++) {
+ for (j = 0; j < num; j++) {
+ if (list1[i] == list2[j])
+ break;
+ }
+ ASSERT_NE(j, num);
+ }
+}
+
+static void check_mounted(struct __test_metadata *const _metadata,
+ const uint64_t mnts[], size_t num)
+{
+ ssize_t ret;
+ uint64_t *list;
+
+ list = malloc((num + 1) * sizeof(list[0]));
+ ASSERT_NE(list, NULL);
+
+ ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
+ ASSERT_EQ(ret, num);
+
+ verify_mount_ids(_metadata, mnts, list, num);
+
+ free(list);
+}
+
+static void setup_mount_tree(struct __test_metadata *const _metadata,
+ int log2_num)
+{
+ int ret, i;
+
+ ret = mount("", "/", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ for (i = 0; i < log2_num; i++) {
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+TEST_F(fanotify, bind)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, move)
+{
+ int ret;
+ uint64_t mnts[2] = { self->root_id };
+ uint64_t move_id;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
+ ASSERT_EQ(ret, 0);
+
+ move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ ASSERT_EQ(move_id, mnts[1]);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, propagate)
+{
+ const unsigned int log2_num = 4;
+ const unsigned int num = (1 << log2_num);
+ uint64_t mnts[num];
+
+ setup_mount_tree(_metadata, log2_num);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
+
+ mnts[0] = self->root_id;
+ check_mounted(_metadata, mnts, num);
+
+ // Cleanup
+ int ret;
+ uint64_t mnts2[num];
+ ret = umount2("/", MNT_DETACH);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/", NULL, MS_PRIVATE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts2[0] = self->root_id;
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
+ verify_mount_ids(_metadata, mnts, mnts2, num);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, fsmount)
+{
+ int ret, fs, mnt;
+ uint64_t mnts[2] = { self->root_id };
+
+ fs = fsopen("tmpfs", 0);
+ ASSERT_GE(fs, 0);
+
+ ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
+ ASSERT_EQ(ret, 0);
+
+ mnt = fsmount(fs, 0, 0);
+ ASSERT_GE(mnt, 0);
+
+ close(fs);
+
+ ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
+ ASSERT_EQ(ret, 0);
+
+ close(mnt);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ ASSERT_NE(mnts[0], mnts[1]);
+
+ check_mounted(_metadata, mnts, 2);
+
+ // Cleanup
+ uint64_t detach_id;
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
+ ASSERT_EQ(detach_id, mnts[1]);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, reparent)
+{
+ uint64_t mnts[6] = { self->root_id };
+ uint64_t dmnts[3];
+ uint64_t masks[3];
+ unsigned int i;
+ int ret;
+
+ // Create setup with a[1] -> b[2] propagation
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/a", NULL, MS_SHARED, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("", "/b", NULL, MS_SLAVE, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ // Mount on a[3], which is propagated to b[4]
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
+
+ check_mounted(_metadata, mnts, 5);
+
+ // Mount on b[5], not propagated
+ ret = mount("/", "/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ check_mounted(_metadata, mnts, 6);
+
+ // Umount a[3], which is propagated to b[4], but not b[5]
+ // This will result in b[5] "falling" on b[2]
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_n(_metadata, self, 3, masks, dmnts);
+ verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
+
+ for (i = 0; i < 3; i++) {
+ if (dmnts[i] == mnts[5]) {
+ ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
+ } else {
+ ASSERT_EQ(masks[i], FAN_MNT_DETACH);
+ }
+ }
+
+ mnts[3] = mnts[5];
+ check_mounted(_metadata, mnts, 4);
+
+ // Cleanup
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/b");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
+ verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_F(fanotify, rmdir)
+{
+ uint64_t mnts[3] = { self->root_id };
+ int ret;
+
+ ret = mount("/", "/a", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
+
+ check_mounted(_metadata, mnts, 3);
+
+ ret = chdir("/a");
+ ASSERT_EQ(ret, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret == 0) {
+ chdir("/");
+ unshare(CLONE_NEWNS);
+ mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ umount2("/a", MNT_DETACH);
+ // This triggers a detach in the other namespace
+ rmdir("/a");
+ exit(0);
+ }
+ wait(NULL);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
+ check_mounted(_metadata, mnts, 1);
+
+ // Cleanup
+ ret = chdir("/");
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(fanotify, pivot_root)
+{
+ uint64_t mnts[3] = { self->root_id };
+ uint64_t mnts2[3];
+ int ret;
+
+ ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+
+ ret = mkdir("/a/new", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mkdir("/a/old", 0700);
+ ASSERT_EQ(ret, 0);
+
+ ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
+ ASSERT_EQ(ret, 0);
+
+ mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
+ check_mounted(_metadata, mnts, 3);
+
+ ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
+ ASSERT_EQ(ret, 0);
+
+ expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
+ verify_mount_ids(_metadata, mnts, mnts2, 2);
+ check_mounted(_metadata, mnts, 3);
+
+ // Cleanup
+ ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a/new");
+ ASSERT_EQ(ret, 0);
+
+ ret = umount("/a");
+ ASSERT_EQ(ret, 0);
+
+ check_mounted(_metadata, mnts, 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
index 457cf76f3c5f..a3d8015897e9 100644
--- a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
+++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
@@ -3,6 +3,8 @@
#define _GNU_SOURCE
#include <fcntl.h>
+#include <linux/auto_dev-ioctl.h>
+#include <linux/errno.h>
#include <sched.h>
#include <stdio.h>
#include <string.h>
@@ -146,4 +148,16 @@ TEST_F(iterate_mount_namespaces, iterate_backward)
}
}
+TEST_F(iterate_mount_namespaces, nfs_valid_ioctl)
+{
+ ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_OPENMOUNT, NULL), 0);
+ ASSERT_EQ(errno, ENOTTY);
+
+ ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_CLOSEMOUNT, NULL), 0);
+ ASSERT_EQ(errno, ENOTTY);
+
+ ASSERT_NE(ioctl(self->fd_mnt_ns[0], AUTOFS_DEV_IOCTL_READY, NULL), 0);
+ ASSERT_EQ(errno, ENOTTY);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile
index e8d1adb021af..6c661232b3b5 100644
--- a/tools/testing/selftests/filesystems/overlayfs/Makefile
+++ b/tools/testing/selftests/filesystems/overlayfs/Makefile
@@ -1,7 +1,14 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := dev_in_maps set_layers_via_fds
+CFLAGS += -Wall
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lcap
-CFLAGS := -Wall -Werror
+LOCAL_HDRS += wrappers.h log.h
+
+TEST_GEN_PROGS := dev_in_maps
+TEST_GEN_PROGS += set_layers_via_fds
include ../../lib.mk
+
+$(OUTPUT)/set_layers_via_fds: ../utils.c
diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
index 1d0ae785a667..5074e64e74a8 100644
--- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
+++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c
@@ -6,26 +6,40 @@
#include <sched.h>
#include <stdio.h>
#include <string.h>
+#include <sys/socket.h>
#include <sys/stat.h>
+#include <sys/sysmacros.h>
#include <sys/mount.h>
#include <unistd.h>
#include "../../kselftest_harness.h"
+#include "../../pidfd/pidfd.h"
#include "log.h"
+#include "../utils.h"
#include "wrappers.h"
FIXTURE(set_layers_via_fds) {
+ int pidfd;
};
FIXTURE_SETUP(set_layers_via_fds)
{
- ASSERT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
+ self->pidfd = -EBADF;
+ EXPECT_EQ(mkdir("/set_layers_via_fds", 0755), 0);
+ EXPECT_EQ(mkdir("/set_layers_via_fds_tmpfs", 0755), 0);
}
FIXTURE_TEARDOWN(set_layers_via_fds)
{
+ if (self->pidfd >= 0) {
+ EXPECT_EQ(sys_pidfd_send_signal(self->pidfd, SIGKILL, NULL, 0), 0);
+ EXPECT_EQ(close(self->pidfd), 0);
+ }
umount2("/set_layers_via_fds", 0);
- ASSERT_EQ(rmdir("/set_layers_via_fds"), 0);
+ EXPECT_EQ(rmdir("/set_layers_via_fds"), 0);
+
+ umount2("/set_layers_via_fds_tmpfs", 0);
+ EXPECT_EQ(rmdir("/set_layers_via_fds_tmpfs"), 0);
}
TEST_F(set_layers_via_fds, set_layers_via_fds)
@@ -214,4 +228,493 @@ TEST_F(set_layers_via_fds, set_500_layers_via_fds)
ASSERT_EQ(close(fd_overlay), 0);
}
+TEST_F(set_layers_via_fds, set_override_creds)
+{
+ int fd_context, fd_tmpfs, fd_overlay;
+ int layer_fds[] = { [0 ... 3] = -EBADF };
+ pid_t pid;
+ int pidfd;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "nooverride_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_GE(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_override_creds_invalid)
+{
+ int fd_context, fd_tmpfs, fd_overlay, ret;
+ int layer_fds[] = { [0 ... 3] = -EBADF };
+ pid_t pid;
+ int fd_userns1, fd_userns2;
+ int ipc_sockets[2];
+ char c;
+ const unsigned int predictable_fd_context_nr = 123;
+
+ fd_userns1 = get_userns_fd(0, 0, 10000);
+ ASSERT_GE(fd_userns1, 0);
+
+ fd_userns2 = get_userns_fd(0, 1234, 10000);
+ ASSERT_GE(fd_userns2, 0);
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ ASSERT_GE(ret, 0);
+
+ pid = create_child(&self->pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (close(ipc_sockets[0])) {
+ TH_LOG("close should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!switch_userns(fd_userns2, 0, 0, false)) {
+ TH_LOG("switch_userns should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (read_nointr(ipc_sockets[1], &c, 1) != 1) {
+ TH_LOG("read_nointr should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (close(ipc_sockets[1])) {
+ TH_LOG("close should have succeeded");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (!sys_fsconfig(predictable_fd_context_nr, FSCONFIG_SET_FLAG, "override_creds", NULL, 0)) {
+ TH_LOG("sys_fsconfig should have failed");
+ _exit(EXIT_FAILURE);
+ }
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ ASSERT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(switch_userns(fd_userns1, 0, 0, false), true);
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+ ASSERT_EQ(dup3(fd_context, predictable_fd_context_nr, 0), predictable_fd_context_nr);
+ ASSERT_EQ(close(fd_context), 0);
+ fd_context = predictable_fd_context_nr;
+ ASSERT_EQ(write_nointr(ipc_sockets[0], "1", 1), 1);
+ ASSERT_EQ(close(ipc_sockets[0]), 0);
+
+ ASSERT_EQ(wait_for_pid(pid), 0);
+ ASSERT_EQ(close(self->pidfd), 0);
+ self->pidfd = -EBADF;
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++)
+ ASSERT_EQ(close(layer_fds[i]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+ ASSERT_EQ(close(fd_userns1), 0);
+ ASSERT_EQ(close(fd_userns2), 0);
+}
+
+TEST_F(set_layers_via_fds, set_override_creds_nomknod)
+{
+ int fd_context, fd_tmpfs, fd_overlay;
+ int layer_fds[] = { [0 ... 3] = -EBADF };
+ pid_t pid;
+ int pidfd;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+
+ layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY);
+ ASSERT_GE(layer_fds[3], 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "userxattr", NULL, 0), 0);
+
+ pid = create_child(&pidfd, 0);
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ if (!cap_down(CAP_MKNOD))
+ _exit(EXIT_FAILURE);
+
+ if (!cap_down(CAP_SYS_ADMIN))
+ _exit(EXIT_FAILURE);
+
+ if (sys_fsconfig(fd_context, FSCONFIG_SET_FLAG, "override_creds", NULL, 0))
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+ ASSERT_EQ(sys_waitid(P_PID, pid, NULL, WEXITED), 0);
+ ASSERT_GE(close(pidfd), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(mknodat(fd_overlay, "dev-zero", S_IFCHR | 0644, makedev(1, 5)), -1);
+ ASSERT_EQ(errno, EPERM);
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_500_layers_via_opath_fds)
+{
+ int fd_context, fd_tmpfs, fd_overlay, fd_work, fd_upper, fd_lower;
+ int layer_fds[500] = { [0 ... 499] = -EBADF };
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ char path[100];
+
+ sprintf(path, "l%d", i);
+ ASSERT_EQ(mkdirat(fd_tmpfs, path, 0755), 0);
+ layer_fds[i] = openat(fd_tmpfs, path, O_DIRECTORY | O_PATH);
+ ASSERT_GE(layer_fds[i], 0);
+ }
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0);
+ fd_work = openat(fd_tmpfs, "w", O_DIRECTORY | O_PATH);
+ ASSERT_GE(fd_work, 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ fd_upper = openat(fd_tmpfs, "u", O_DIRECTORY | O_PATH);
+ ASSERT_GE(fd_upper, 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l501", 0755), 0);
+ fd_lower = openat(fd_tmpfs, "l501", O_DIRECTORY | O_PATH);
+ ASSERT_GE(fd_lower, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, fd_work), 0);
+ ASSERT_EQ(close(fd_work), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, fd_upper), 0);
+ ASSERT_EQ(close(fd_upper), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[i]), 0);
+ ASSERT_EQ(close(layer_fds[i]), 0);
+ }
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, fd_lower), 0);
+ ASSERT_EQ(close(fd_lower), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+}
+
+TEST_F(set_layers_via_fds, set_layers_via_detached_mount_fds)
+{
+ int fd_context, fd_tmpfs, fd_overlay, fd_tmp;
+ int layer_fds[] = { [0 ... 8] = -EBADF };
+ bool layers_found[] = { [0 ... 8] = false };
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f_mountinfo;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0);
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+ ASSERT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u/upper", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "u/work", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l3", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "l4", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d1", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d2", 0755), 0);
+ ASSERT_EQ(mkdirat(fd_tmpfs, "d3", 0755), 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/set_layers_via_fds_tmpfs", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ fd_tmp = open_tree(fd_tmpfs, "u", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(fd_tmp, 0);
+
+ layer_fds[0] = openat(fd_tmp, "upper", O_CLOEXEC | O_DIRECTORY | O_PATH);
+ ASSERT_GE(layer_fds[0], 0);
+
+ layer_fds[1] = openat(fd_tmp, "work", O_CLOEXEC | O_DIRECTORY | O_PATH);
+ ASSERT_GE(layer_fds[1], 0);
+
+ layer_fds[2] = open_tree(fd_tmpfs, "l1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[2], 0);
+
+ layer_fds[3] = open_tree(fd_tmpfs, "l2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[3], 0);
+
+ layer_fds[4] = open_tree(fd_tmpfs, "l3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[4], 0);
+
+ layer_fds[5] = open_tree(fd_tmpfs, "l4", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[5], 0);
+
+ layer_fds[6] = open_tree(fd_tmpfs, "d1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[6], 0);
+
+ layer_fds[7] = open_tree(fd_tmpfs, "d2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[7], 0);
+
+ layer_fds[8] = open_tree(fd_tmpfs, "d3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(layer_fds[8], 0);
+
+ ASSERT_EQ(close(fd_tmpfs), 0);
+
+ fd_context = sys_fsopen("overlay", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[0]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[1]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[4]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[5]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[6]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[7]), 0);
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[8]), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_overlay = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_overlay, 0);
+
+ ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ f_mountinfo = fopen("/proc/self/mountinfo", "r");
+ ASSERT_NE(f_mountinfo, NULL);
+
+ while (getline(&line, &len, f_mountinfo) != -1) {
+ char *haystack = line;
+
+ if (strstr(haystack, "workdir=/tmp/w"))
+ layers_found[0] = true;
+ if (strstr(haystack, "upperdir=/tmp/u"))
+ layers_found[1] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l1"))
+ layers_found[2] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l2"))
+ layers_found[3] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l3"))
+ layers_found[4] = true;
+ if (strstr(haystack, "lowerdir+=/tmp/l4"))
+ layers_found[5] = true;
+ if (strstr(haystack, "datadir+=/tmp/d1"))
+ layers_found[6] = true;
+ if (strstr(haystack, "datadir+=/tmp/d2"))
+ layers_found[7] = true;
+ if (strstr(haystack, "datadir+=/tmp/d3"))
+ layers_found[8] = true;
+ }
+ free(line);
+
+ for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) {
+ ASSERT_EQ(layers_found[i], true);
+ ASSERT_EQ(close(layer_fds[i]), 0);
+ }
+
+ ASSERT_EQ(close(fd_context), 0);
+ ASSERT_EQ(close(fd_overlay), 0);
+ ASSERT_EQ(fclose(f_mountinfo), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/overlayfs/wrappers.h
index 071b95fd2ac0..c38bc48e0cfa 100644
--- a/tools/testing/selftests/filesystems/overlayfs/wrappers.h
+++ b/tools/testing/selftests/filesystems/overlayfs/wrappers.h
@@ -44,4 +44,21 @@ static inline int sys_move_mount(int from_dfd, const char *from_pathname,
to_pathname, flags);
}
+#ifndef OPEN_TREE_CLONE
+#define OPEN_TREE_CLONE 1
+#endif
+
+#ifndef OPEN_TREE_CLOEXEC
+#define OPEN_TREE_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000
+#endif
+
+static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+{
+ return syscall(__NR_open_tree, dfd, filename, flags);
+}
+
#endif
diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h
index f4294bab9d73..a7a5289ddae9 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount.h
+++ b/tools/testing/selftests/filesystems/statmount/statmount.h
@@ -25,7 +25,7 @@ static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
return syscall(__NR_statmount, &req, buf, bufsize, flags);
}
-static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
+static inline ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
uint64_t last_mnt_id, uint64_t list[], size_t num,
unsigned int flags)
{
diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c
new file mode 100644
index 000000000000..e553c89c5b19
--- /dev/null
+++ b/tools/testing/selftests/filesystems/utils.c
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <fcntl.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <grp.h>
+#include <linux/limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/eventfd.h>
+#include <sys/fsuid.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/xattr.h>
+
+#include "utils.h"
+
+#define MAX_USERNS_LEVEL 32
+
+#define syserror(format, ...) \
+ ({ \
+ fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
+ (-errno); \
+ })
+
+#define syserror_set(__ret__, format, ...) \
+ ({ \
+ typeof(__ret__) __internal_ret__ = (__ret__); \
+ errno = labs(__ret__); \
+ fprintf(stderr, "%m - " format "\n", ##__VA_ARGS__); \
+ __internal_ret__; \
+ })
+
+#define STRLITERALLEN(x) (sizeof(""x"") - 1)
+
+#define INTTYPE_TO_STRLEN(type) \
+ (2 + (sizeof(type) <= 1 \
+ ? 3 \
+ : sizeof(type) <= 2 \
+ ? 5 \
+ : sizeof(type) <= 4 \
+ ? 10 \
+ : sizeof(type) <= 8 ? 20 : sizeof(int[-2 * (sizeof(type) > 8)])))
+
+#define list_for_each(__iterator, __list) \
+ for (__iterator = (__list)->next; __iterator != __list; __iterator = __iterator->next)
+
+typedef enum idmap_type_t {
+ ID_TYPE_UID,
+ ID_TYPE_GID
+} idmap_type_t;
+
+struct id_map {
+ idmap_type_t map_type;
+ __u32 nsid;
+ __u32 hostid;
+ __u32 range;
+};
+
+struct list {
+ void *elem;
+ struct list *next;
+ struct list *prev;
+};
+
+struct userns_hierarchy {
+ int fd_userns;
+ int fd_event;
+ unsigned int level;
+ struct list id_map;
+};
+
+static inline void list_init(struct list *list)
+{
+ list->elem = NULL;
+ list->next = list->prev = list;
+}
+
+static inline int list_empty(const struct list *list)
+{
+ return list == list->next;
+}
+
+static inline void __list_add(struct list *new, struct list *prev, struct list *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+static inline void list_add_tail(struct list *head, struct list *list)
+{
+ __list_add(list, head->prev, head);
+}
+
+static inline void list_del(struct list *list)
+{
+ struct list *next, *prev;
+
+ next = list->next;
+ prev = list->prev;
+ next->prev = prev;
+ prev->next = next;
+}
+
+static ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = read(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int get_userns_fd_cb(void *data)
+{
+ for (;;)
+ pause();
+ _exit(0);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, size_t buf_size)
+{
+ int fd = -EBADF, setgroups_fd = -EBADF;
+ int fret = -1;
+ int ret;
+ char path[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
+ STRLITERALLEN("/setgroups") + 1];
+
+ if (geteuid() != 0 && map_type == ID_TYPE_GID) {
+ ret = snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
+ if (ret < 0 || ret >= sizeof(path))
+ goto out;
+
+ setgroups_fd = open(path, O_WRONLY | O_CLOEXEC);
+ if (setgroups_fd < 0 && errno != ENOENT) {
+ syserror("Failed to open \"%s\"", path);
+ goto out;
+ }
+
+ if (setgroups_fd >= 0) {
+ ret = write_nointr(setgroups_fd, "deny\n", STRLITERALLEN("deny\n"));
+ if (ret != STRLITERALLEN("deny\n")) {
+ syserror("Failed to write \"deny\" to \"/proc/%d/setgroups\"", pid);
+ goto out;
+ }
+ }
+ }
+
+ ret = snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, map_type == ID_TYPE_UID ? 'u' : 'g');
+ if (ret < 0 || ret >= sizeof(path))
+ goto out;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC);
+ if (fd < 0) {
+ syserror("Failed to open \"%s\"", path);
+ goto out;
+ }
+
+ ret = write_nointr(fd, buf, buf_size);
+ if (ret != buf_size) {
+ syserror("Failed to write %cid mapping to \"%s\"",
+ map_type == ID_TYPE_UID ? 'u' : 'g', path);
+ goto out;
+ }
+
+ fret = 0;
+out:
+ close(fd);
+ close(setgroups_fd);
+
+ return fret;
+}
+
+static int map_ids_from_idmap(struct list *idmap, pid_t pid)
+{
+ int fill, left;
+ char mapbuf[4096] = {};
+ bool had_entry = false;
+ idmap_type_t map_type, u_or_g;
+
+ if (list_empty(idmap))
+ return 0;
+
+ for (map_type = ID_TYPE_UID, u_or_g = 'u';
+ map_type <= ID_TYPE_GID; map_type++, u_or_g = 'g') {
+ char *pos = mapbuf;
+ int ret;
+ struct list *iterator;
+
+
+ list_for_each(iterator, idmap) {
+ struct id_map *map = iterator->elem;
+ if (map->map_type != map_type)
+ continue;
+
+ had_entry = true;
+
+ left = 4096 - (pos - mapbuf);
+ fill = snprintf(pos, left, "%u %u %u\n", map->nsid, map->hostid, map->range);
+ /*
+ * The kernel only takes <= 4k for writes to
+ * /proc/<pid>/{g,u}id_map
+ */
+ if (fill <= 0 || fill >= left)
+ return syserror_set(-E2BIG, "Too many %cid mappings defined", u_or_g);
+
+ pos += fill;
+ }
+ if (!had_entry)
+ continue;
+
+ ret = write_id_mapping(map_type, pid, mapbuf, pos - mapbuf);
+ if (ret < 0)
+ return syserror("Failed to write mapping: %s", mapbuf);
+
+ memset(mapbuf, 0, sizeof(mapbuf));
+ }
+
+ return 0;
+}
+
+static int get_userns_fd_from_idmap(struct list *idmap)
+{
+ int ret;
+ pid_t pid;
+ char path_ns[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
+ STRLITERALLEN("/ns/user") + 1];
+
+ pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER | CLONE_NEWNS);
+ if (pid < 0)
+ return -errno;
+
+ ret = map_ids_from_idmap(idmap, pid);
+ if (ret < 0)
+ return ret;
+
+ ret = snprintf(path_ns, sizeof(path_ns), "/proc/%d/ns/user", pid);
+ if (ret < 0 || (size_t)ret >= sizeof(path_ns))
+ ret = -EIO;
+ else
+ ret = open(path_ns, O_RDONLY | O_CLOEXEC | O_NOCTTY);
+
+ (void)kill(pid, SIGKILL);
+ (void)wait_for_pid(pid);
+ return ret;
+}
+
+int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
+{
+ struct list head, uid_mapl, gid_mapl;
+ struct id_map uid_map = {
+ .map_type = ID_TYPE_UID,
+ .nsid = nsid,
+ .hostid = hostid,
+ .range = range,
+ };
+ struct id_map gid_map = {
+ .map_type = ID_TYPE_GID,
+ .nsid = nsid,
+ .hostid = hostid,
+ .range = range,
+ };
+
+ list_init(&head);
+ uid_mapl.elem = &uid_map;
+ gid_mapl.elem = &gid_map;
+ list_add_tail(&head, &uid_mapl);
+ list_add_tail(&head, &gid_mapl);
+
+ return get_userns_fd_from_idmap(&head);
+}
+
+bool switch_ids(uid_t uid, gid_t gid)
+{
+ if (setgroups(0, NULL))
+ return syserror("failure: setgroups");
+
+ if (setresgid(gid, gid, gid))
+ return syserror("failure: setresgid");
+
+ if (setresuid(uid, uid, uid))
+ return syserror("failure: setresuid");
+
+ /* Ensure we can access proc files from processes we can ptrace. */
+ if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0))
+ return syserror("failure: make dumpable");
+
+ return true;
+}
+
+static int create_userns_hierarchy(struct userns_hierarchy *h);
+
+static int userns_fd_cb(void *data)
+{
+ struct userns_hierarchy *h = data;
+ char c;
+ int ret;
+
+ ret = read_nointr(h->fd_event, &c, 1);
+ if (ret < 0)
+ return syserror("failure: read from socketpair");
+
+ /* Only switch ids if someone actually wrote a mapping for us. */
+ if (c == '1') {
+ if (!switch_ids(0, 0))
+ return syserror("failure: switch ids to 0");
+ }
+
+ ret = write_nointr(h->fd_event, "1", 1);
+ if (ret < 0)
+ return syserror("failure: write to socketpair");
+
+ ret = create_userns_hierarchy(++h);
+ if (ret < 0)
+ return syserror("failure: userns level %d", h->level);
+
+ return 0;
+}
+
+static int create_userns_hierarchy(struct userns_hierarchy *h)
+{
+ int fret = -1;
+ char c;
+ int fd_socket[2];
+ int fd_userns = -EBADF, ret = -1;
+ ssize_t bytes;
+ pid_t pid;
+ char path[256];
+
+ if (h->level == MAX_USERNS_LEVEL)
+ return 0;
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fd_socket);
+ if (ret < 0)
+ return syserror("failure: create socketpair");
+
+ /* Note the CLONE_FILES | CLONE_VM when mucking with fds and memory. */
+ h->fd_event = fd_socket[1];
+ pid = do_clone(userns_fd_cb, h, CLONE_NEWUSER | CLONE_FILES | CLONE_VM);
+ if (pid < 0) {
+ syserror("failure: userns level %d", h->level);
+ goto out_close;
+ }
+
+ ret = map_ids_from_idmap(&h->id_map, pid);
+ if (ret < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: writing id mapping for userns level %d for %d", h->level, pid);
+ goto out_wait;
+ }
+
+ if (!list_empty(&h->id_map))
+ bytes = write_nointr(fd_socket[0], "1", 1); /* Inform the child we wrote a mapping. */
+ else
+ bytes = write_nointr(fd_socket[0], "0", 1); /* Inform the child we didn't write a mapping. */
+ if (bytes < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: write to socketpair");
+ goto out_wait;
+ }
+
+ /* Wait for child to set*id() and become dumpable. */
+ bytes = read_nointr(fd_socket[0], &c, 1);
+ if (bytes < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: read from socketpair");
+ goto out_wait;
+ }
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ fd_userns = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd_userns < 0) {
+ kill(pid, SIGKILL);
+ syserror("failure: open userns level %d for %d", h->level, pid);
+ goto out_wait;
+ }
+
+ fret = 0;
+
+out_wait:
+ if (!wait_for_pid(pid) && !fret) {
+ h->fd_userns = fd_userns;
+ fd_userns = -EBADF;
+ }
+
+out_close:
+ if (fd_userns >= 0)
+ close(fd_userns);
+ close(fd_socket[0]);
+ close(fd_socket[1]);
+ return fret;
+}
+
+/* caps_down - lower all effective caps */
+int caps_down(void)
+{
+ bool fret = false;
+ cap_t caps = NULL;
+ int ret = -1;
+
+ caps = cap_get_proc();
+ if (!caps)
+ goto out;
+
+ ret = cap_clear_flag(caps, CAP_EFFECTIVE);
+ if (ret)
+ goto out;
+
+ ret = cap_set_proc(caps);
+ if (ret)
+ goto out;
+
+ fret = true;
+
+out:
+ cap_free(caps);
+ return fret;
+}
+
+/* cap_down - lower an effective cap */
+int cap_down(cap_value_t down)
+{
+ bool fret = false;
+ cap_t caps = NULL;
+ cap_value_t cap = down;
+ int ret = -1;
+
+ caps = cap_get_proc();
+ if (!caps)
+ goto out;
+
+ ret = cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, 0);
+ if (ret)
+ goto out;
+
+ ret = cap_set_proc(caps);
+ if (ret)
+ goto out;
+
+ fret = true;
+
+out:
+ cap_free(caps);
+ return fret;
+}
diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h
new file mode 100644
index 000000000000..7f1df2a3e94c
--- /dev/null
+++ b/tools/testing/selftests/filesystems/utils.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __IDMAP_UTILS_H
+#define __IDMAP_UTILS_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/capability.h>
+#include <sys/fsuid.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+extern int get_userns_fd(unsigned long nsid, unsigned long hostid,
+ unsigned long range);
+
+extern int caps_down(void);
+extern int cap_down(cap_value_t down);
+
+extern bool switch_ids(uid_t uid, gid_t gid);
+
+static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
+{
+ if (setns(fd, CLONE_NEWUSER))
+ return false;
+
+ if (!switch_ids(uid, gid))
+ return false;
+
+ if (drop_caps && !caps_down())
+ return false;
+
+ return true;
+}
+
+#endif /* __IDMAP_UTILS_H */
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
index dc25bcf4f9e2..73f6c6fcecab 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
@@ -7,12 +7,42 @@ echo 0 > events/enable
echo > dynamic_events
PLACE=$FUNCTION_FORK
+PLACE2="kmem_cache_free"
+PLACE3="schedule_timeout"
+
+# Some functions may have BPF programs attached, therefore
+# count already enabled_functions before tests start
+ocnt=`cat enabled_functions | wc -l`
echo "f:myevent1 $PLACE" >> dynamic_events
+
+# Make sure the event is attached and is the only one
+grep -q $PLACE enabled_functions
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 1)) ]; then
+ exit_fail
+fi
+
echo "f:myevent2 $PLACE%return" >> dynamic_events
+# It should till be the only attached function
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 1)) ]; then
+ exit_fail
+fi
+
+# add another event
+echo "f:myevent3 $PLACE2" >> dynamic_events
+
+grep -q $PLACE2 enabled_functions
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 2)) ]; then
+ exit_fail
+fi
+
grep -q myevent1 dynamic_events
grep -q myevent2 dynamic_events
+grep -q myevent3 dynamic_events
test -d events/fprobes/myevent1
test -d events/fprobes/myevent2
@@ -21,6 +51,34 @@ echo "-:myevent2" >> dynamic_events
grep -q myevent1 dynamic_events
! grep -q myevent2 dynamic_events
+# should still have 2 left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 2)) ]; then
+ exit_fail
+fi
+
echo > dynamic_events
+# Should have none left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $ocnt ]; then
+ exit_fail
+fi
+
+echo "f:myevent4 $PLACE" >> dynamic_events
+
+# Should only have one enabled
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $((ocnt + 1)) ]; then
+ exit_fail
+fi
+
+echo > dynamic_events
+
+# Should have none left
+cnt=`cat enabled_functions | wc -l`
+if [ $cnt -ne $ocnt ]; then
+ exit_fail
+fi
+
clear_trace
diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile
index 0336353bd15f..2839d2612ce3 100644
--- a/tools/testing/selftests/hid/Makefile
+++ b/tools/testing/selftests/hid/Makefile
@@ -43,10 +43,8 @@ TEST_GEN_PROGS = hid_bpf hidraw
# $3 - target (assumed to be file); only file name will be emitted;
# $4 - optional extra arg, emitted as-is, if provided.
ifeq ($(V),1)
-Q =
msg =
else
-Q = @
msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
MAKEFLAGS += --no-print-directory
submake_extras := feature_display=0
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index cdf91b0ca40f..c3b6d2604b1e 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -444,10 +444,6 @@ static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ...
static inline int ksft_min_kernel_version(unsigned int min_major,
unsigned int min_minor)
{
-#ifdef NOLIBC
- ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n");
- return 0;
-#else
unsigned int major, minor;
struct utsname info;
@@ -455,7 +451,6 @@ static inline int ksft_min_kernel_version(unsigned int min_major,
ksft_exit_fail_msg("Can't parse kernel version\n");
return major > min_major || (major == min_major && minor >= min_minor);
-#endif
}
#endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/kselftest/module.sh b/tools/testing/selftests/kselftest/module.sh
index fb4733faff12..51fb65159932 100755
--- a/tools/testing/selftests/kselftest/module.sh
+++ b/tools/testing/selftests/kselftest/module.sh
@@ -11,7 +11,7 @@
# SPDX-License-Identifier: GPL-2.0+
# $(dirname $0)/../kselftest/module.sh "description" module_name
#
-# Example: tools/testing/selftests/lib/printf.sh
+# Example: tools/testing/selftests/lib/bitmap.sh
desc="" # Output prefix.
module="" # Filename (without the .ko).
diff --git a/tools/testing/selftests/kvm/mmu_stress_test.c b/tools/testing/selftests/kvm/mmu_stress_test.c
index d9c76b4c0d88..6a437d2be9fa 100644
--- a/tools/testing/selftests/kvm/mmu_stress_test.c
+++ b/tools/testing/selftests/kvm/mmu_stress_test.c
@@ -18,6 +18,7 @@
#include "ucall_common.h"
static bool mprotect_ro_done;
+static bool all_vcpus_hit_ro_fault;
static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
{
@@ -36,9 +37,9 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
/*
* Write to the region while mprotect(PROT_READ) is underway. Keep
- * looping until the memory is guaranteed to be read-only, otherwise
- * vCPUs may complete their writes and advance to the next stage
- * prematurely.
+ * looping until the memory is guaranteed to be read-only and a fault
+ * has occurred, otherwise vCPUs may complete their writes and advance
+ * to the next stage prematurely.
*
* For architectures that support skipping the faulting instruction,
* generate the store via inline assembly to ensure the exact length
@@ -56,7 +57,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
#else
vcpu_arch_put_guest(*((volatile uint64_t *)gpa), gpa);
#endif
- } while (!READ_ONCE(mprotect_ro_done));
+ } while (!READ_ONCE(mprotect_ro_done) || !READ_ONCE(all_vcpus_hit_ro_fault));
/*
* Only architectures that write the entire range can explicitly sync,
@@ -81,6 +82,7 @@ struct vcpu_info {
static int nr_vcpus;
static atomic_t rendezvous;
+static atomic_t nr_ro_faults;
static void rendezvous_with_boss(void)
{
@@ -148,12 +150,16 @@ static void *vcpu_worker(void *data)
* be stuck on the faulting instruction for other architectures. Go to
* stage 3 without a rendezvous
*/
- do {
- r = _vcpu_run(vcpu);
- } while (!r);
+ r = _vcpu_run(vcpu);
TEST_ASSERT(r == -1 && errno == EFAULT,
"Expected EFAULT on write to RO memory, got r = %d, errno = %d", r, errno);
+ atomic_inc(&nr_ro_faults);
+ if (atomic_read(&nr_ro_faults) == nr_vcpus) {
+ WRITE_ONCE(all_vcpus_hit_ro_fault, true);
+ sync_global_to_guest(vm, all_vcpus_hit_ro_fault);
+ }
+
#if defined(__x86_64__) || defined(__aarch64__)
/*
* Verify *all* writes from the guest hit EFAULT due to the VMA now
@@ -378,7 +384,6 @@ int main(int argc, char *argv[])
rendezvous_with_vcpus(&time_run2, "run 2");
mprotect(mem, slot_size, PROT_READ);
- usleep(10);
mprotect_ro_done = true;
sync_global_to_guest(vm, mprotect_ro_done);
diff --git a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
index 4f5881d4ef66..4e920705681a 100644
--- a/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86/hyperv_cpuid.c
@@ -41,13 +41,19 @@ static bool smt_possible(void)
return res;
}
-static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
- bool evmcs_expected)
+static void test_hv_cpuid(struct kvm_vcpu *vcpu, bool evmcs_expected)
{
+ const bool has_irqchip = !vcpu || vcpu->vm->has_irqchip;
+ const struct kvm_cpuid2 *hv_cpuid_entries;
int i;
int nent_expected = 10;
u32 test_val;
+ if (vcpu)
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+ else
+ hv_cpuid_entries = kvm_get_supported_hv_cpuid();
+
TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
"KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
" (returned %d)",
@@ -80,12 +86,19 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
entry->eax, evmcs_expected
);
break;
+ case 0x40000003:
+ TEST_ASSERT(has_irqchip || !(entry->edx & BIT(19)),
+ "\"Direct\" Synthetic Timers should require in-kernel APIC");
+ break;
case 0x40000004:
test_val = entry->eax & (1UL << 18);
TEST_ASSERT(!!test_val == !smt_possible(),
"NoNonArchitecturalCoreSharing bit"
" doesn't reflect SMT setting");
+
+ TEST_ASSERT(has_irqchip || !(entry->eax & BIT(10)),
+ "Cluster IPI (i.e. SEND_IPI) should require in-kernel APIC");
break;
case 0x4000000A:
TEST_ASSERT(entry->eax & (1UL << 19),
@@ -109,9 +122,16 @@ static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
* entry->edx);
*/
}
+
+ /*
+ * Note, the CPUID array returned by the system-scoped helper is a one-
+ * time allocation, i.e. must not be freed.
+ */
+ if (vcpu)
+ free((void *)hv_cpuid_entries);
}
-void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+static void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
static struct kvm_cpuid2 cpuid = {.nent = 0};
int ret;
@@ -129,19 +149,20 @@ void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
- const struct kvm_cpuid2 *hv_cpuid_entries;
struct kvm_vcpu *vcpu;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
- vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ /* Test the vCPU ioctl without an in-kernel local APIC. */
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
+ test_hv_cpuid(vcpu, false);
+ kvm_vm_free(vm);
/* Test vCPU ioctl version */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
test_hv_cpuid_e2big(vm, vcpu);
-
- hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
- test_hv_cpuid(hv_cpuid_entries, false);
- free((void *)hv_cpuid_entries);
+ test_hv_cpuid(vcpu, false);
if (!kvm_cpu_has(X86_FEATURE_VMX) ||
!kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
@@ -149,9 +170,7 @@ int main(int argc, char *argv[])
goto do_sys;
}
vcpu_enable_evmcs(vcpu);
- hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
- test_hv_cpuid(hv_cpuid_entries, true);
- free((void *)hv_cpuid_entries);
+ test_hv_cpuid(vcpu, true);
do_sys:
/* Test system ioctl version */
@@ -161,9 +180,7 @@ do_sys:
}
test_hv_cpuid_e2big(vm, NULL);
-
- hv_cpuid_entries = kvm_get_supported_hv_cpuid();
- test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
+ test_hv_cpuid(NULL, kvm_cpu_has(X86_FEATURE_VMX));
out:
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
index 3eb0313ffa39..3641a42934ac 100644
--- a/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
+++ b/tools/testing/selftests/kvm/x86/nested_exceptions_test.c
@@ -85,6 +85,7 @@ static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
+ GUEST_ASSERT(!ctrl->int_state);
}
static void l1_svm_code(struct svm_test_data *svm)
@@ -122,6 +123,7 @@ static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
+ GUEST_ASSERT(!vmreadz(GUEST_INTERRUPTIBILITY_INFO));
}
static void l1_vmx_code(struct vmx_pages *vmx)
diff --git a/tools/testing/selftests/kvm/x86/sev_smoke_test.c b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
index a1a688e75266..d97816dc476a 100644
--- a/tools/testing/selftests/kvm/x86/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86/sev_smoke_test.c
@@ -52,7 +52,8 @@ static void compare_xsave(u8 *from_host, u8 *from_guest)
bool bad = false;
for (i = 0; i < 4095; i++) {
if (from_host[i] != from_guest[i]) {
- printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]);
+ printf("mismatch at %u | %02hhx %02hhx\n",
+ i, from_host[i], from_guest[i]);
bad = true;
}
}
diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore
index 470203a7cd73..335b2b1a3463 100644
--- a/tools/testing/selftests/landlock/.gitignore
+++ b/tools/testing/selftests/landlock/.gitignore
@@ -1,2 +1,4 @@
/*_test
+/sandbox-and-launch
/true
+/wait-pipe
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index a604ea5d8297..6064c9ac0532 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -207,6 +207,7 @@ enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd)
struct protocol_variant {
int domain;
int type;
+ int protocol;
};
struct service_fixture {
diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
index 29af19c4e9f9..425de4c20271 100644
--- a/tools/testing/selftests/landlock/config
+++ b/tools/testing/selftests/landlock/config
@@ -1,8 +1,11 @@
+CONFIG_AF_UNIX_OOB=y
CONFIG_CGROUPS=y
CONFIG_CGROUP_SCHED=y
CONFIG_INET=y
CONFIG_IPV6=y
CONFIG_KEYS=y
+CONFIG_MPTCP=y
+CONFIG_MPTCP_IPV6=y
CONFIG_NET=y
CONFIG_NET_NS=y
CONFIG_OVERLAY_FS=y
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
index 4e0aeb53b225..d9de0ee49ebc 100644
--- a/tools/testing/selftests/landlock/net_test.c
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -85,18 +85,18 @@ static void setup_loopback(struct __test_metadata *const _metadata)
clear_ambient_cap(_metadata, CAP_NET_ADMIN);
}
+static bool prot_is_tcp(const struct protocol_variant *const prot)
+{
+ return (prot->domain == AF_INET || prot->domain == AF_INET6) &&
+ prot->type == SOCK_STREAM &&
+ (prot->protocol == IPPROTO_TCP || prot->protocol == IPPROTO_IP);
+}
+
static bool is_restricted(const struct protocol_variant *const prot,
const enum sandbox_type sandbox)
{
- switch (prot->domain) {
- case AF_INET:
- case AF_INET6:
- switch (prot->type) {
- case SOCK_STREAM:
- return sandbox == TCP_SANDBOX;
- }
- break;
- }
+ if (sandbox == TCP_SANDBOX)
+ return prot_is_tcp(prot);
return false;
}
@@ -105,7 +105,7 @@ static int socket_variant(const struct service_fixture *const srv)
int ret;
ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC,
- 0);
+ srv->protocol.protocol);
if (ret < 0)
return -errno;
return ret;
@@ -290,22 +290,70 @@ FIXTURE_TEARDOWN(protocol)
}
/* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) {
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp1) {
/* clang-format on */
.sandbox = NO_SANDBOX,
.prot = {
.domain = AF_INET,
.type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
},
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) {
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp2) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_mptcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp1) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp2) {
/* clang-format on */
.sandbox = NO_SANDBOX,
.prot = {
.domain = AF_INET6,
.type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_mptcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
},
};
@@ -350,22 +398,70 @@ FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) {
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) {
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp1) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp2) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_mptcp) {
/* clang-format on */
.sandbox = TCP_SANDBOX,
.prot = {
.domain = AF_INET,
.type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp1) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ /* IPPROTO_IP == 0 */
+ .protocol = IPPROTO_IP,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp2) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .protocol = IPPROTO_TCP,
},
};
/* clang-format off */
-FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) {
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_mptcp) {
/* clang-format on */
.sandbox = TCP_SANDBOX,
.prot = {
.domain = AF_INET6,
.type = SOCK_STREAM,
+ .protocol = IPPROTO_MPTCP,
},
};
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index c52fe3ad8e98..f876bf4744e1 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,5 +4,5 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh
+TEST_PROGS := bitmap.sh
include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index dc15aba8d0a3..81a1f64a22e8 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,5 +1,2 @@
-CONFIG_TEST_PRINTF=m
-CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
-CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_BITOPS=m
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
deleted file mode 100755
index 370b79a9cb2e..000000000000
--- a/tools/testing/selftests/lib/prime_numbers.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Checks fast/slow prime_number generation for inconsistencies
-$(dirname $0)/../kselftest/module.sh "prime numbers" prime_numbers selftest=65536
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
deleted file mode 100755
index 05f4544e87f9..000000000000
--- a/tools/testing/selftests/lib/printf.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Tests the printf infrastructure using test_printf kernel module.
-$(dirname $0)/../kselftest/module.sh "printf" test_printf
diff --git a/tools/testing/selftests/lib/scanf.sh b/tools/testing/selftests/lib/scanf.sh
deleted file mode 100755
index b59b8ba561c3..000000000000
--- a/tools/testing/selftests/lib/scanf.sh
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Tests the scanf infrastructure using test_scanf kernel module.
-$(dirname $0)/../kselftest/module.sh "scanf" test_scanf
diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-pages.c
index ece37212a8a2..525c50d3ec23 100644
--- a/tools/testing/selftests/mm/guard-pages.c
+++ b/tools/testing/selftests/mm/guard-pages.c
@@ -19,6 +19,8 @@
#include <sys/uio.h>
#include <unistd.h>
+#include "../pidfd/pidfd.h"
+
/*
* Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1:
*
@@ -50,11 +52,6 @@ static void handle_fatal(int c)
siglongjmp(signal_jmp_buf, c);
}
-static int pidfd_open(pid_t pid, unsigned int flags)
-{
- return syscall(SYS_pidfd_open, pid, flags);
-}
-
static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec,
size_t n, int advice, unsigned int flags)
{
@@ -370,14 +367,10 @@ TEST_F(guard_pages, multi_vma)
TEST_F(guard_pages, process_madvise)
{
const unsigned long page_size = self->page_size;
- pid_t pid = getpid();
- int pidfd = pidfd_open(pid, 0);
char *ptr_region, *ptr1, *ptr2, *ptr3;
ssize_t count;
struct iovec vec[6];
- ASSERT_NE(pidfd, -1);
-
/* Reserve region to map over. */
ptr_region = mmap(NULL, 100 * page_size, PROT_NONE,
MAP_ANON | MAP_PRIVATE, -1, 0);
@@ -425,7 +418,7 @@ TEST_F(guard_pages, process_madvise)
ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0);
/* Now guard in one step. */
- count = sys_process_madvise(pidfd, vec, 6, MADV_GUARD_INSTALL, 0);
+ count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_INSTALL, 0);
/* OK we don't have permission to do this, skip. */
if (count == -1 && errno == EPERM)
@@ -446,7 +439,7 @@ TEST_F(guard_pages, process_madvise)
ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size]));
/* Now do the same with unguard... */
- count = sys_process_madvise(pidfd, vec, 6, MADV_GUARD_REMOVE, 0);
+ count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_REMOVE, 0);
/* ...and everything should now succeed. */
@@ -463,7 +456,6 @@ TEST_F(guard_pages, process_madvise)
ASSERT_EQ(munmap(ptr1, 10 * page_size), 0);
ASSERT_EQ(munmap(ptr2, 5 * page_size), 0);
ASSERT_EQ(munmap(ptr3, 20 * page_size), 0);
- close(pidfd);
}
/* Assert that unmapping ranges does not leave guard markers behind. */
diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c
index ada9156cc497..c463d1c09c9b 100644
--- a/tools/testing/selftests/mm/hugepage-mremap.c
+++ b/tools/testing/selftests/mm/hugepage-mremap.c
@@ -15,7 +15,7 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <sys/mman.h>
#include <errno.h>
#include <fcntl.h> /* Definition of O_* constants */
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index 66b4e111b5a2..b61803e36d1c 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -11,7 +11,7 @@
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/mman.h>
@@ -369,6 +369,7 @@ unmap:
munmap(map, size);
}
+#ifdef __NR_userfaultfd
static void test_unmerge_uffd_wp(void)
{
struct uffdio_writeprotect uffd_writeprotect;
@@ -429,6 +430,7 @@ close_uffd:
unmap:
munmap(map, size);
}
+#endif
/* Verify that KSM can be enabled / queried with prctl. */
static void test_prctl(void)
@@ -684,7 +686,9 @@ int main(int argc, char **argv)
exit(test_child_ksm());
}
+#ifdef __NR_userfaultfd
tests++;
+#endif
ksft_print_header();
ksft_set_plan(tests);
@@ -696,7 +700,9 @@ int main(int argc, char **argv)
test_unmerge();
test_unmerge_zero_pages();
test_unmerge_discarded();
+#ifdef __NR_userfaultfd
test_unmerge_uffd_wp();
+#endif
test_prot_none();
diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c
index 74c911aa3aea..9a0597310a76 100644
--- a/tools/testing/selftests/mm/memfd_secret.c
+++ b/tools/testing/selftests/mm/memfd_secret.c
@@ -17,7 +17,7 @@
#include <stdlib.h>
#include <string.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>
@@ -28,6 +28,8 @@
#define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__)
#define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__)
+#ifdef __NR_memfd_secret
+
#define PATTERN 0x55
static const int prot = PROT_READ | PROT_WRITE;
@@ -332,3 +334,13 @@ int main(int argc, char *argv[])
ksft_finished();
}
+
+#else /* __NR_memfd_secret */
+
+int main(int argc, char *argv[])
+{
+ printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_memfd_secret */
diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c
index af2fce496912..09feeb453646 100644
--- a/tools/testing/selftests/mm/mkdirty.c
+++ b/tools/testing/selftests/mm/mkdirty.c
@@ -9,7 +9,7 @@
*/
#include <fcntl.h>
#include <signal.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
@@ -265,6 +265,7 @@ munmap:
munmap(mmap_mem, mmap_size);
}
+#ifdef __NR_userfaultfd
static void test_uffdio_copy(void)
{
struct uffdio_register uffdio_register;
@@ -322,6 +323,7 @@ munmap:
munmap(dst, pagesize);
free(src);
}
+#endif /* __NR_userfaultfd */
int main(void)
{
@@ -334,7 +336,9 @@ int main(void)
thpsize / 1024);
tests += 3;
}
+#ifdef __NR_userfaultfd
tests += 1;
+#endif /* __NR_userfaultfd */
ksft_print_header();
ksft_set_plan(tests);
@@ -364,7 +368,9 @@ int main(void)
if (thpsize)
test_pte_mapped_thp();
/* Placing a fresh page via userfaultfd may set the PTE dirty. */
+#ifdef __NR_userfaultfd
test_uffdio_copy();
+#endif /* __NR_userfaultfd */
err = ksft_get_fail_cnt();
if (err)
diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h
index 1e5731bab499..4417eaa5cfb7 100644
--- a/tools/testing/selftests/mm/mlock2.h
+++ b/tools/testing/selftests/mm/mlock2.h
@@ -3,7 +3,6 @@
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
-#include <asm-generic/unistd.h>
static int mlock2_(void *start, size_t len, int flags)
{
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index a4683f2476f2..35565af308af 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -42,7 +42,7 @@
#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <asm-generic/unistd.h>
+#include <unistd.h>
#include <sys/ptrace.h>
#include <setjmp.h>
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 333c468c2699..7cc71d942f83 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -220,7 +220,7 @@ run_test() {
if test_selected ${CATEGORY}; then
# On memory constrainted systems some tests can fail to allocate hugepages.
# perform some cleanup before the test for a higher success rate.
- if [ ${CATEGORY} == "thp" ] | [ ${CATEGORY} == "hugetlb" ]; then
+ if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then
echo 3 > /proc/sys/vm/drop_caches
sleep 2
echo 1 > /proc/sys/vm/compact_memory
@@ -304,7 +304,9 @@ uffd_stress_bin=./uffd-stress
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
# Hugetlb tests require source and destination huge pages. Pass in half
# the size of the free pages we have, which is used for *each*.
-half_ufd_size_MB=$((freepgs / 2))
+# uffd-stress expects a region expressed in MiB, so we adjust
+# half_ufd_size_MB accordingly.
+half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2))
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
index 717539eddf98..7ad6ba660c7d 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -673,7 +673,11 @@ int uffd_open_dev(unsigned int flags)
int uffd_open_sys(unsigned int flags)
{
+#ifdef __NR_userfaultfd
return syscall(__NR_userfaultfd, flags);
+#else
+ return -1;
+#endif
}
int uffd_open(unsigned int flags)
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
index a4b83280998a..944d559ade21 100644
--- a/tools/testing/selftests/mm/uffd-stress.c
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -33,10 +33,11 @@
* pthread_mutex_lock will also verify the atomicity of the memory
* transfer (UFFDIO_COPY).
*/
-#include <asm-generic/unistd.h>
+
#include "uffd-common.h"
uint64_t features;
+#ifdef __NR_userfaultfd
#define BOUNCE_RANDOM (1<<0)
#define BOUNCE_RACINGFAULTS (1<<1)
@@ -471,3 +472,15 @@ int main(int argc, char **argv)
nr_pages, nr_pages_per_cpu);
return userfaultfd_stress();
}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+ printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 9ff71fa1f9bf..74c8bc02b506 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -5,11 +5,12 @@
* Copyright (C) 2015-2023 Red Hat, Inc.
*/
-#include <asm-generic/unistd.h>
#include "uffd-common.h"
#include "../../../../mm/gup_test.h"
+#ifdef __NR_userfaultfd
+
/* The unit test doesn't need a large or random size, make it 32MB for now */
#define UFFD_TEST_MEM_SIZE (32UL << 20)
@@ -1558,3 +1559,14 @@ int main(int argc, char *argv[])
return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
}
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+ printf("Skipping %s (missing __NR_userfaultfd)\n", __file__);
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 70f65eb320a7..48a000cabc97 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -20,6 +20,7 @@
#include <stdarg.h>
#include <linux/mount.h>
+#include "../filesystems/overlayfs/wrappers.h"
#include "../kselftest_harness.h"
#ifndef CLONE_NEWNS
@@ -126,6 +127,26 @@
#endif
#endif
+#ifndef __NR_move_mount
+ #if defined __alpha__
+ #define __NR_move_mount 539
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_move_mount 4429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_move_mount 6429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_move_mount 5429
+ #endif
+ #elif defined __ia64__
+ #define __NR_move_mount (428 + 1024)
+ #else
+ #define __NR_move_mount 429
+ #endif
+#endif
+
#ifndef MOUNT_ATTR_IDMAP
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
@@ -397,6 +418,10 @@ FIXTURE_SETUP(mount_setattr)
ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
+ ASSERT_EQ(mkdir("/tmp/target1", 0777), 0);
+
+ ASSERT_EQ(mkdir("/tmp/target2", 0777), 0);
+
ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
"size=100000,mode=700"), 0);
@@ -1506,4 +1531,631 @@ TEST_F(mount_setattr, mount_attr_nosymfollow)
ASSERT_EQ(close(fd), 0);
}
+TEST_F(mount_setattr, open_tree_detached)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_subdir, 0);
+ /*
+ * /AA testing tmpfs
+ * `-/AA/B testing tmpfs
+ * `-/AA/B/BB testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_subdir, "B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_subdir, "B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_EQ(move_mount(fd_tree_subdir, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ /*
+ * /tmp/target1 testing tmpfs
+ * `-/tmp/target1/B testing tmpfs
+ * `-/tmp/target1/B/BB testing tmpfs
+ */
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target2", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ /*
+ * /tmp/target2 testing tmpfs
+ * |-/tmp/target2/A testing tmpfs
+ * | `-/tmp/target2/A/AA testing tmpfs
+ * | `-/tmp/target2/A/AA/B testing tmpfs
+ * | `-/tmp/target2/A/AA/B/BB testing tmpfs
+ * `-/tmp/target2/B testing ramfs
+ */
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target2/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ EXPECT_EQ(close(fd_tree_base), 0);
+ EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, open_tree_detached_fail)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ /*
+ * The origin mount namespace of the anonymous mount namespace
+ * of @fd_tree_base doesn't match the caller's mount namespace
+ * anymore so creation of another detached mounts must fail.
+ */
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_LT(fd_tree_subdir, 0);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+TEST_F(mount_setattr, open_tree_detached_fail2)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ EXPECT_EQ(create_and_enter_userns(), 0);
+
+ /*
+ * The caller entered a new user namespace. They will have
+ * CAP_SYS_ADMIN in this user namespace. However, they're still
+ * located in a mount namespace that is owned by an ancestor
+ * user namespace in which they hold no privilege. Creating a
+ * detached mount must thus fail.
+ */
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_LT(fd_tree_subdir, 0);
+ ASSERT_EQ(errno, EPERM);
+}
+
+TEST_F(mount_setattr, open_tree_detached_fail3)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_EQ(statx(fd_tree_base, "A/AA/B/BB", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ EXPECT_EQ(prepare_unpriv_mountns(), 0);
+
+ /*
+ * The caller entered a new mount namespace. They will have
+ * CAP_SYS_ADMIN in the owning user namespace of their mount
+ * namespace.
+ *
+ * However, the origin mount namespace of the anonymous mount
+ * namespace of @fd_tree_base doesn't match the caller's mount
+ * namespace anymore so creation of another detached mounts must
+ * fail.
+ */
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "A/AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_LT(fd_tree_subdir, 0);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+TEST_F(mount_setattr, open_tree_subfolder)
+{
+ int fd_context, fd_tmpfs, fd_tree;
+
+ fd_context = sys_fsopen("tmpfs", 0);
+ ASSERT_GE(fd_context, 0);
+
+ ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0);
+
+ fd_tmpfs = sys_fsmount(fd_context, 0, 0);
+ ASSERT_GE(fd_tmpfs, 0);
+
+ EXPECT_EQ(close(fd_context), 0);
+
+ ASSERT_EQ(mkdirat(fd_tmpfs, "subdir", 0755), 0);
+
+ fd_tree = sys_open_tree(fd_tmpfs, "subdir",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree, 0);
+
+ EXPECT_EQ(close(fd_tmpfs), 0);
+
+ ASSERT_EQ(mkdirat(-EBADF, "/mnt/open_tree_subfolder", 0755), 0);
+
+ ASSERT_EQ(sys_move_mount(fd_tree, "", -EBADF, "/mnt/open_tree_subfolder", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ EXPECT_EQ(close(fd_tree), 0);
+
+ ASSERT_EQ(umount2("/mnt/open_tree_subfolder", 0), 0);
+
+ EXPECT_EQ(rmdir("/mnt/open_tree_subfolder"), 0);
+}
+
+TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_then_close)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_subdir, 0);
+ /*
+ * /mnt testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ /*
+ * /mnt testing tmpfs
+ * `-/mnt testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+ ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+
+ EXPECT_EQ(close(fd_tree_base), 0);
+ EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, mount_detached_mount_on_detached_mount_and_attach)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+ __u64 mnt_id = 0;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+ /*
+ * /mnt testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ fd_tree_subdir = sys_open_tree(fd_tree_base, "",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_subdir, 0);
+ /*
+ * /mnt testing tmpfs
+ */
+ ASSERT_EQ(statx(fd_tree_subdir, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ /*
+ * /mnt testing tmpfs
+ * `-/mnt testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+ ASSERT_EQ(statx(fd_tree_subdir, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
+ mnt_id = stx.stx_mnt_id;
+
+ ASSERT_NE(move_mount(fd_tree_subdir, "", fd_tree_base, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+
+ ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, STATX_MNT_ID_UNIQUE, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+ ASSERT_TRUE(stx.stx_mask & STATX_MNT_ID_UNIQUE);
+ ASSERT_EQ(stx.stx_mnt_id, mnt_id);
+
+ EXPECT_EQ(close(fd_tree_base), 0);
+ EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, move_mount_detached_fail)
+{
+ int fd_tree_base = -EBADF, fd_tree_subdir = -EBADF;
+ struct statx stx;
+
+ fd_tree_base = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_base, 0);
+
+ /* Attach the mount to the caller's mount namespace. */
+ ASSERT_EQ(move_mount(fd_tree_base, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(statx(fd_tree_base, "A", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ fd_tree_subdir = sys_open_tree(-EBADF, "/tmp/B",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree_subdir, 0);
+ ASSERT_EQ(statx(fd_tree_subdir, "BB", 0, 0, &stx), 0);
+ ASSERT_FALSE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ /* Not allowed to move an attached mount to a detached mount. */
+ ASSERT_NE(move_mount(fd_tree_base, "", fd_tree_subdir, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ EXPECT_EQ(close(fd_tree_base), 0);
+ EXPECT_EQ(close(fd_tree_subdir), 0);
+}
+
+TEST_F(mount_setattr, attach_detached_mount_then_umount_then_close)
+{
+ int fd_tree = -EBADF;
+ struct statx stx;
+
+ fd_tree = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree, 0);
+
+ ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx), 0);
+ /* We copied with AT_RECURSIVE so /mnt/A must be a mountpoint. */
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ /* Attach the mount to the caller's mount namespace. */
+ ASSERT_EQ(move_mount(fd_tree, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(statx(-EBADF, "/tmp/target1", 0, 0, &stx), 0);
+ ASSERT_TRUE(stx.stx_attributes & STATX_ATTR_MOUNT_ROOT);
+
+ ASSERT_EQ(umount2("/tmp/target1", MNT_DETACH), 0);
+
+ /*
+ * This tests whether dissolve_on_fput() handles a NULL mount
+ * namespace correctly, i.e., that it doesn't splat.
+ */
+ EXPECT_EQ(close(fd_tree), 0);
+}
+
+TEST_F(mount_setattr, mount_detached1_onto_detached2_then_close_detached1_then_mount_detached2_onto_attached)
+{
+ int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
+
+ /*
+ * |-/mnt/A testing tmpfs
+ * `-/mnt/A/AA testing tmpfs
+ * `-/mnt/A/AA/B testing tmpfs
+ * `-/mnt/A/AA/B/BB testing tmpfs
+ */
+ fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree1, 0);
+
+ /*
+ * `-/mnt/B testing ramfs
+ */
+ fd_tree2 = sys_open_tree(-EBADF, "/mnt/B",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree2, 0);
+
+ /*
+ * Move the source detached mount tree to the target detached
+ * mount tree. This will move all the mounts in the source mount
+ * tree from the source anonymous mount namespace to the target
+ * anonymous mount namespace.
+ *
+ * The source detached mount tree and the target detached mount
+ * tree now both refer to the same anonymous mount namespace.
+ *
+ * |-"" testing ramfs
+ * `-"" testing tmpfs
+ * `-""/AA testing tmpfs
+ * `-""/AA/B testing tmpfs
+ * `-""/AA/B/BB testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree1, "", fd_tree2, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+
+ /*
+ * The source detached mount tree @fd_tree1 is now an attached
+ * mount, i.e., it has a parent. Specifically, it now has the
+ * root mount of the mount tree of @fd_tree2 as its parent.
+ *
+ * That means we are no longer allowed to attach it as we only
+ * allow attaching the root of an anonymous mount tree, not
+ * random bits and pieces. Verify that the kernel enforces this.
+ */
+ ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ /*
+ * Closing the source detached mount tree must not unmount and
+ * free the shared anonymous mount namespace. The kernel will
+ * quickly yell at us because the anonymous mount namespace
+ * won't be empty when it's freed.
+ */
+ EXPECT_EQ(close(fd_tree1), 0);
+
+ /*
+ * Attach the mount tree to a non-anonymous mount namespace.
+ * This can only succeed if closing fd_tree1 had proper
+ * semantics and didn't cause the anonymous mount namespace to
+ * be freed. If it did this will trigger a UAF which will be
+ * visible on any KASAN enabled kernel.
+ *
+ * |-/tmp/target1 testing ramfs
+ * `-/tmp/target1 testing tmpfs
+ * `-/tmp/target1/AA testing tmpfs
+ * `-/tmp/target1/AA/B testing tmpfs
+ * `-/tmp/target1/AA/B/BB testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+ EXPECT_EQ(close(fd_tree2), 0);
+}
+
+TEST_F(mount_setattr, two_detached_mounts_referring_to_same_anonymous_mount_namespace)
+{
+ int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
+
+ /*
+ * Copy the following mount tree:
+ *
+ * |-/mnt/A testing tmpfs
+ * `-/mnt/A/AA testing tmpfs
+ * `-/mnt/A/AA/B testing tmpfs
+ * `-/mnt/A/AA/B/BB testing tmpfs
+ */
+ fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree1, 0);
+
+ /*
+ * Create an O_PATH file descriptors with a separate struct file
+ * that refers to the same detached mount tree as @fd_tree1
+ */
+ fd_tree2 = sys_open_tree(fd_tree1, "",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(fd_tree2, 0);
+
+ /*
+ * Copy the following mount tree:
+ *
+ * |-/tmp/target1 testing tmpfs
+ * `-/tmp/target1/AA testing tmpfs
+ * `-/tmp/target1/AA/B testing tmpfs
+ * `-/tmp/target1/AA/B/BB testing tmpfs
+ */
+ ASSERT_EQ(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ /*
+ * This must fail as this would mean adding the same mount tree
+ * into the same mount tree.
+ */
+ ASSERT_NE(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+}
+
+TEST_F(mount_setattr, two_detached_subtrees_of_same_anonymous_mount_namespace)
+{
+ int fd_tree1 = -EBADF, fd_tree2 = -EBADF;
+
+ /*
+ * Copy the following mount tree:
+ *
+ * |-/mnt/A testing tmpfs
+ * `-/mnt/A/AA testing tmpfs
+ * `-/mnt/A/AA/B testing tmpfs
+ * `-/mnt/A/AA/B/BB testing tmpfs
+ */
+ fd_tree1 = sys_open_tree(-EBADF, "/mnt/A",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree1, 0);
+
+ /*
+ * Create an O_PATH file descriptors with a separate struct file that
+ * refers to a subtree of the same detached mount tree as @fd_tree1
+ */
+ fd_tree2 = sys_open_tree(fd_tree1, "AA",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_EMPTY_PATH | OPEN_TREE_CLOEXEC);
+ ASSERT_GE(fd_tree2, 0);
+
+ /*
+ * This must fail as it is only possible to attach the root of a
+ * detached mount tree.
+ */
+ ASSERT_NE(move_mount(fd_tree2, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+
+ ASSERT_EQ(move_mount(fd_tree1, "", -EBADF, "/tmp/target1", MOVE_MOUNT_F_EMPTY_PATH), 0);
+}
+
+TEST_F(mount_setattr, detached_tree_propagation)
+{
+ int fd_tree = -EBADF;
+ struct statx stx1, stx2, stx3, stx4;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(mount(NULL, "/mnt", NULL, MS_REC | MS_SHARED, NULL), 0);
+
+ /*
+ * Copy the following mount tree:
+ *
+ * /mnt testing tmpfs
+ * |-/mnt/A testing tmpfs
+ * | `-/mnt/A/AA testing tmpfs
+ * | `-/mnt/A/AA/B testing tmpfs
+ * | `-/mnt/A/AA/B/BB testing tmpfs
+ * `-/mnt/B testing ramfs
+ */
+ fd_tree = sys_open_tree(-EBADF, "/mnt",
+ AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
+ AT_RECURSIVE | OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(fd_tree, 0);
+
+ ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx1), 0);
+ ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx2), 0);
+
+ /*
+ * Copying the mount namespace like done above doesn't alter the
+ * mounts in any way so the filesystem mounted on /mnt must be
+ * identical even though the mounts will differ. Use the device
+ * information to verify that. Note that tmpfs will have a 0
+ * major number so comparing the major number is misleading.
+ */
+ ASSERT_EQ(stx1.stx_dev_minor, stx2.stx_dev_minor);
+
+ /* Mount a tmpfs filesystem over /mnt/A. */
+ ASSERT_EQ(mount(NULL, "/mnt/A", "tmpfs", 0, NULL), 0);
+
+
+ ASSERT_EQ(statx(-EBADF, "/mnt/A", 0, 0, &stx3), 0);
+ ASSERT_EQ(statx(fd_tree, "A", 0, 0, &stx4), 0);
+
+ /*
+ * A new filesystem has been mounted on top of /mnt/A which
+ * means that the device information will be different for any
+ * statx() that was taken from /mnt/A before the mount compared
+ * to one after the mount.
+ *
+ * Since we already now that the device information between the
+ * stx1 and stx2 samples are identical we also now that stx2 and
+ * stx3 device information will necessarily differ.
+ */
+ ASSERT_NE(stx1.stx_dev_minor, stx3.stx_dev_minor);
+
+ /*
+ * If mount propagation worked correctly then the tmpfs mount
+ * that was created after the mount namespace was unshared will
+ * have propagated onto /mnt/A in the detached mount tree.
+ *
+ * Verify that the device information for stx3 and stx4 are
+ * identical. It is already established that stx3 is different
+ * from both stx1 and stx2 sampled before the tmpfs mount was
+ * done so if stx3 and stx4 are identical the proof is done.
+ */
+ ASSERT_EQ(stx3.stx_dev_minor, stx4.stx_dev_minor);
+
+ EXPECT_EQ(close(fd_tree), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 73ee88d6b043..8f32b4f01aee 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -100,6 +100,7 @@ TEST_PROGS += vlan_bridge_binding.sh
TEST_PROGS += bpf_offload.py
TEST_PROGS += ipv6_route_update_soft_lockup.sh
TEST_PROGS += busy_poll_test.sh
+TEST_PROGS += lwt_dst_cache_ref_loop.sh
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller netlink-dumps
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 5b9baf708950..61e5116987f3 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -107,3 +107,5 @@ CONFIG_XFRM_INTERFACE=m
CONFIG_XFRM_USER=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IPV6_ILA=m
+CONFIG_IPV6_RPL_LWTUNNEL=y
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
index 3885a2a91f7d..baed5e380dae 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
@@ -20,6 +20,7 @@ ALL_TESTS="
test_port_range_ipv4_tcp
test_port_range_ipv6_udp
test_port_range_ipv6_tcp
+ test_port_range_ipv4_udp_drop
"
NUM_NETIFS=4
@@ -194,6 +195,51 @@ test_port_range_ipv6_tcp()
__test_port_range $proto $ip_proto $sip $dip $mode "$name"
}
+test_port_range_ipv4_udp_drop()
+{
+ local proto=ipv4
+ local ip_proto=udp
+ local sip=192.0.2.1
+ local dip=192.0.2.2
+ local mode="-4"
+ local name="IPv4 UDP Drop"
+ local dmac=$(mac_get $h2)
+ local smac=$(mac_get $h1)
+ local sport_min=2000
+ local sport_max=3000
+ local sport_mid=$((sport_min + (sport_max - sport_min) / 2))
+ local dport=5000
+
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \
+ flower src_ip $sip dst_ip $dip ip_proto $ip_proto \
+ src_port $sport_min-$sport_max \
+ dst_port $dport \
+ action drop
+
+ # Test ports outside range - should pass
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_min - 1)),dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_max + 1)),dp=$dport"
+
+ # Test ports inside range - should be dropped
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_min,dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_mid,dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_max,dp=$dport"
+
+ tc_check_packets "dev $swp1 ingress" 101 3
+ check_err $? "Filter did not drop the expected number of packets"
+
+ tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower
+
+ log_test "Port range matching - $name"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index bc6b6762baf3..c22623b9a2a5 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -9,7 +9,10 @@ TEST_FILES := ../../../../../Documentation/netlink/specs
TEST_FILES += ../../../../net/ynl
TEST_GEN_FILES += csum
+TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
TEST_INCLUDES := $(wildcard py/*.py sh/*.sh)
include ../../lib.mk
+
+include ../bpf.mk
diff --git a/tools/testing/selftests/net/lib/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
new file mode 100644
index 000000000000..e73fab3edd9f
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define KBUILD_MODNAME "xdp_dummy"
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp.frags")
+int xdp_dummy_prog_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
new file mode 100755
index 000000000000..881eb399798f
--- /dev/null
+++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# WARNING
+# -------
+# This is just a dummy script that triggers encap cases with possible dst cache
+# reference loops in affected lwt users (see list below). Some cases are
+# pathological configurations for simplicity, others are valid. Overall, we
+# don't want this issue to happen, no matter what. In order to catch any
+# reference loops, kmemleak MUST be used. The results alone are always blindly
+# successful, don't rely on them. Note that the following tests may crash the
+# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is
+# not present.
+#
+# Affected lwt users so far (please update accordingly if needed):
+# - ila_lwt (output only)
+# - ioam6_iptunnel (output only)
+# - rpl_iptunnel (both input and output)
+# - seg6_iptunnel (both input and output)
+
+source lib.sh
+
+check_compatibility()
+{
+ setup_ns tmp_node &>/dev/null
+ if [ $? != 0 ]; then
+ echo "SKIP: Cannot create netns."
+ exit $ksft_skip
+ fi
+
+ ip link add name veth0 netns $tmp_node type veth \
+ peer name veth1 netns $tmp_node &>/dev/null
+ local ret=$?
+
+ ip -netns $tmp_node link set veth0 up &>/dev/null
+ ret=$((ret + $?))
+
+ ip -netns $tmp_node link set veth1 up &>/dev/null
+ ret=$((ret + $?))
+
+ if [ $ret != 0 ]; then
+ echo "SKIP: Cannot configure links."
+ cleanup_ns $tmp_node
+ exit $ksft_skip
+ fi
+
+ lsmod 2>/dev/null | grep -q "ila"
+ ila_lsmod=$?
+ [ $ila_lsmod != 0 ] && modprobe ila &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:1::/64 \
+ encap ila 1:2:3:4 csum-mode no-action ident-type luid \
+ hook-type output \
+ dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:2::/64 \
+ encap ioam6 trace prealloc type 0x800000 ns 0 size 4 \
+ dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:3::/64 \
+ encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:4::/64 \
+ encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila"
+ skip_ila=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6"
+ skip_ioam6=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl"
+ skip_rpl=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6"
+ skip_seg6=$?
+
+ cleanup_ns $tmp_node
+}
+
+setup()
+{
+ setup_ns alpha beta gamma &>/dev/null
+
+ ip link add name veth-alpha netns $alpha type veth \
+ peer name veth-betaL netns $beta &>/dev/null
+
+ ip link add name veth-betaR netns $beta type veth \
+ peer name veth-gamma netns $gamma &>/dev/null
+
+ ip -netns $alpha link set veth-alpha name veth0 &>/dev/null
+ ip -netns $beta link set veth-betaL name veth0 &>/dev/null
+ ip -netns $beta link set veth-betaR name veth1 &>/dev/null
+ ip -netns $gamma link set veth-gamma name veth0 &>/dev/null
+
+ ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null
+ ip -netns $alpha link set veth0 up &>/dev/null
+ ip -netns $alpha link set lo up &>/dev/null
+ ip -netns $alpha route add 2001:db8:2::/64 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null
+ ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null
+ ip -netns $beta link set veth0 up &>/dev/null
+ ip -netns $beta link set veth1 up &>/dev/null
+ ip -netns $beta link set lo up &>/dev/null
+ ip -netns $beta route del 2001:db8:2::/64
+ ip -netns $beta route add 2001:db8:2::/64 dev veth1
+ ip netns exec $beta \
+ sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null
+
+ ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null
+ ip -netns $gamma link set veth0 up &>/dev/null
+ ip -netns $gamma link set lo up &>/dev/null
+ ip -netns $gamma route add 2001:db8:1::/64 \
+ via 2001:db8:2::1 dev veth0 &>/dev/null
+
+ sleep 1
+
+ ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null
+ if [ $? != 0 ]; then
+ echo "SKIP: Setup failed."
+ exit $ksft_skip
+ fi
+
+ sleep 1
+}
+
+cleanup()
+{
+ cleanup_ns $alpha $beta $gamma
+ [ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null
+}
+
+run_ila()
+{
+ if [ $skip_ila != 0 ]; then
+ echo "SKIP: ila (output)"
+ return
+ fi
+
+ ip -netns $beta route del 2001:db8:2::/64
+ ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \
+ encap ila 2001:db8:2:0 csum-mode no-action ident-type luid \
+ hook-type output \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: ila (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128
+ ip -netns $beta route add 2001:db8:2::/64 dev veth1
+ sleep 1
+}
+
+run_ioam6()
+{
+ if [ $skip_ioam6 != 0 ]; then
+ echo "SKIP: ioam6 (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: ioam6 (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run_rpl()
+{
+ if [ $skip_rpl != 0 ]; then
+ echo "SKIP: rpl (input)"
+ echo "SKIP: rpl (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap rpl segs 2001:db8:2::2 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: rpl (input)"
+ ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ echo "TEST: rpl (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run_seg6()
+{
+ if [ $skip_seg6 != 0 ]; then
+ echo "SKIP: seg6 (input)"
+ echo "SKIP: seg6 (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap seg6 mode inline segs 2001:db8:2::2 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: seg6 (input)"
+ ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ echo "TEST: seg6 (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run()
+{
+ run_ila
+ run_ioam6
+ run_rpl
+ run_seg6
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges."
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool."
+ exit $ksft_skip
+fi
+
+check_compatibility
+
+trap cleanup EXIT
+
+setup
+run
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
index c28379a965d8..1559ba275105 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -13,6 +13,12 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+ echo SKIP: kernel is tainted
+ exit $ksft_skip
+fi
+
cleanup() {
cleanup_all_ns
}
@@ -165,6 +171,7 @@ if [ "$t" -eq 0 ];then
echo PASS: kernel not tainted
else
echo ERROR: kernel is tainted
+ dmesg
ret=1
fi
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
index 6a764d70ab06..4788641717d9 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
@@ -4,6 +4,12 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+ echo SKIP: kernel is tainted
+ exit $ksft_skip
+fi
+
cleanup() {
cleanup_all_ns
}
@@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then
echo PASS: kernel not tainted
else
echo ERROR: kernel is tainted
+ dmesg
exit 1
fi
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 785e3875a6da..784d1b46912b 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -593,6 +593,7 @@ EOF
echo "PASS: queue program exiting while packets queued"
else
echo "TAINT: queue program exiting while packets queued"
+ dmesg
ret=1
fi
}
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 7d14a7c0cb62..58bcbbd029bc 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -47,6 +47,7 @@ XARCH_riscv = riscv64
XARCH = $(or $(XARCH_$(ARCH)),$(ARCH))
# map from user input variants to their kernel supported architectures
+ARCH_armthumb = arm
ARCH_ppc = powerpc
ARCH_ppc64 = powerpc
ARCH_ppc64le = powerpc
@@ -54,6 +55,7 @@ ARCH_mips32le = mips
ARCH_mips32be = mips
ARCH_riscv32 = riscv
ARCH_riscv64 = riscv
+ARCH_s390x = s390
ARCH := $(or $(ARCH_$(XARCH)),$(XARCH))
# kernel image names by architecture
@@ -62,6 +64,7 @@ IMAGE_x86_64 = arch/x86/boot/bzImage
IMAGE_x86 = arch/x86/boot/bzImage
IMAGE_arm64 = arch/arm64/boot/Image
IMAGE_arm = arch/arm/boot/zImage
+IMAGE_armthumb = arch/arm/boot/zImage
IMAGE_mips32le = vmlinuz
IMAGE_mips32be = vmlinuz
IMAGE_ppc = vmlinux
@@ -70,6 +73,7 @@ IMAGE_ppc64le = arch/powerpc/boot/zImage
IMAGE_riscv = arch/riscv/boot/Image
IMAGE_riscv32 = arch/riscv/boot/Image
IMAGE_riscv64 = arch/riscv/boot/Image
+IMAGE_s390x = arch/s390/boot/bzImage
IMAGE_s390 = arch/s390/boot/bzImage
IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi
IMAGE = $(objtree)/$(IMAGE_$(XARCH))
@@ -81,19 +85,20 @@ DEFCONFIG_x86_64 = defconfig
DEFCONFIG_x86 = defconfig
DEFCONFIG_arm64 = defconfig
DEFCONFIG_arm = multi_v7_defconfig
+DEFCONFIG_armthumb = multi_v7_defconfig
DEFCONFIG_mips32le = malta_defconfig
-DEFCONFIG_mips32be = malta_defconfig
+DEFCONFIG_mips32be = malta_defconfig generic/eb.config
DEFCONFIG_ppc = pmac32_defconfig
DEFCONFIG_ppc64 = powernv_be_defconfig
DEFCONFIG_ppc64le = powernv_defconfig
DEFCONFIG_riscv = defconfig
DEFCONFIG_riscv32 = rv32_defconfig
DEFCONFIG_riscv64 = defconfig
-DEFCONFIG_s390 = defconfig
+DEFCONFIG_s390x = defconfig
+DEFCONFIG_s390 = defconfig compat.config
DEFCONFIG_loongarch = defconfig
DEFCONFIG = $(DEFCONFIG_$(XARCH))
-EXTRACONFIG_mips32be = -d CONFIG_CPU_LITTLE_ENDIAN -e CONFIG_CPU_BIG_ENDIAN
EXTRACONFIG = $(EXTRACONFIG_$(XARCH))
# optional tests to run (default = all)
@@ -105,6 +110,7 @@ QEMU_ARCH_x86_64 = x86_64
QEMU_ARCH_x86 = x86_64
QEMU_ARCH_arm64 = aarch64
QEMU_ARCH_arm = arm
+QEMU_ARCH_armthumb = arm
QEMU_ARCH_mips32le = mipsel # works with malta_defconfig
QEMU_ARCH_mips32be = mips
QEMU_ARCH_ppc = ppc
@@ -113,6 +119,7 @@ QEMU_ARCH_ppc64le = ppc64
QEMU_ARCH_riscv = riscv64
QEMU_ARCH_riscv32 = riscv32
QEMU_ARCH_riscv64 = riscv64
+QEMU_ARCH_s390x = s390x
QEMU_ARCH_s390 = s390x
QEMU_ARCH_loongarch = loongarch64
QEMU_ARCH = $(QEMU_ARCH_$(XARCH))
@@ -133,6 +140,7 @@ QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(
QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_armthumb = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
@@ -141,6 +149,7 @@ QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC
QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA)
@@ -156,15 +165,18 @@ Q=@
endif
CFLAGS_i386 = $(call cc-option,-m32)
+CFLAGS_arm = -marm
+CFLAGS_armthumb = -mthumb -march=armv6t2
CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
-CFLAGS_s390 = -m64
+CFLAGS_s390x = -m64
+CFLAGS_s390 = -m31
CFLAGS_mips32le = -EL -mabi=32 -fPIC
CFLAGS_mips32be = -EB -mabi=32
CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
- $(call cc-option,-fno-stack-protector) \
+ $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \
$(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_EXTRA)
LDFLAGS :=
@@ -220,7 +232,7 @@ all: run
sysroot: sysroot/$(ARCH)/include
-sysroot/$(ARCH)/include:
+sysroot/$(ARCH)/include: | defconfig
$(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot
$(QUIET_MKDIR)mkdir -p sysroot
$(Q)$(MAKE) -C $(srctree) outputmakefile
@@ -264,16 +276,16 @@ initramfs: nolibc-test
$(Q)cp nolibc-test initramfs/init
defconfig:
- $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare
+ $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG)
$(Q)if [ -n "$(EXTRACONFIG)" ]; then \
$(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \
$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \
fi
-kernel:
+kernel: | defconfig
$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null
-kernel-standalone: initramfs
+kernel-standalone: initramfs | defconfig
$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null
# run the tests after building the kernel
diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.c b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
index 5ff4c8a1db2a..a7ca8325863f 100644
--- a/tools/testing/selftests/nolibc/nolibc-test-linkage.c
+++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
@@ -11,16 +11,16 @@ void *linkage_test_errno_addr(void)
return &errno;
}
-int linkage_test_constructor_test_value;
+int linkage_test_constructor_test_value = 0;
__attribute__((constructor))
static void constructor1(void)
{
- linkage_test_constructor_test_value = 2;
+ linkage_test_constructor_test_value |= 1 << 0;
}
__attribute__((constructor))
static void constructor2(void)
{
- linkage_test_constructor_test_value *= 3;
+ linkage_test_constructor_test_value |= 1 << 1;
}
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 0e0e3b48a8c3..5884a891c491 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -43,6 +43,8 @@
#endif
#endif
+#pragma GCC diagnostic ignored "-Wmissing-prototypes"
+
#include "nolibc-test-linkage.h"
/* for the type of int_fast16_t and int_fast32_t, musl differs from glibc and nolibc */
@@ -690,14 +692,14 @@ int expect_strtox(int llen, void *func, const char *input, int base, intmax_t ex
__attribute__((constructor))
static void constructor1(void)
{
- constructor_test_value = 1;
+ constructor_test_value |= 1 << 0;
}
__attribute__((constructor))
static void constructor2(int argc, char **argv, char **envp)
{
if (argc && argv && envp)
- constructor_test_value *= 2;
+ constructor_test_value |= 1 << 1;
}
int run_startup(int min, int max)
@@ -736,9 +738,9 @@ int run_startup(int min, int max)
CASE_TEST(environ_HOME); EXPECT_PTRNZ(1, getenv("HOME")); break;
CASE_TEST(auxv_addr); EXPECT_PTRGT(test_auxv != (void *)-1, test_auxv, brk); break;
CASE_TEST(auxv_AT_UID); EXPECT_EQ(1, getauxval(AT_UID), getuid()); break;
- CASE_TEST(constructor); EXPECT_EQ(1, constructor_test_value, 2); break;
+ CASE_TEST(constructor); EXPECT_EQ(is_nolibc, constructor_test_value, 0x3); break;
CASE_TEST(linkage_errno); EXPECT_PTREQ(1, linkage_test_errno_addr(), &errno); break;
- CASE_TEST(linkage_constr); EXPECT_EQ(1, linkage_test_constructor_test_value, 6); break;
+ CASE_TEST(linkage_constr); EXPECT_EQ(1, linkage_test_constructor_test_value, 0x3); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
@@ -767,6 +769,44 @@ int test_getdents64(const char *dir)
return ret;
}
+static int test_dirent(void)
+{
+ int comm = 0, cmdline = 0;
+ struct dirent dirent, *result;
+ DIR *dir;
+ int ret;
+
+ dir = opendir("/proc/self");
+ if (!dir)
+ return 1;
+
+ while (1) {
+ errno = 0;
+ ret = readdir_r(dir, &dirent, &result);
+ if (ret != 0)
+ return 1;
+ if (!result)
+ break;
+
+ if (strcmp(dirent.d_name, "comm") == 0)
+ comm++;
+ else if (strcmp(dirent.d_name, "cmdline") == 0)
+ cmdline++;
+ }
+
+ if (errno)
+ return 1;
+
+ ret = closedir(dir);
+ if (ret)
+ return 1;
+
+ if (comm != 1 || cmdline != 1)
+ return 1;
+
+ return 0;
+}
+
int test_getpagesize(void)
{
int x = getpagesize();
@@ -988,6 +1028,22 @@ int test_rlimit(void)
return 0;
}
+int test_openat(void)
+{
+ int dev, null;
+
+ dev = openat(AT_FDCWD, "/dev", O_DIRECTORY);
+ if (dev < 0)
+ return -1;
+
+ null = openat(dev, "null", O_RDONLY);
+ close(dev);
+ if (null < 0)
+ return -1;
+
+ close(null);
+ return 0;
+}
/* Run syscall tests between IDs <min> and <max>.
* Return 0 on success, non-zero on failure.
@@ -1059,6 +1115,7 @@ int run_syscall(int min, int max)
CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break;
CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break;
CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break;
+ CASE_TEST(directories); EXPECT_SYSZR(proc, test_dirent()); break;
CASE_TEST(gettimeofday_tv); EXPECT_SYSZR(1, gettimeofday(&tv, NULL)); break;
CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break;
CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break;
@@ -1073,8 +1130,9 @@ int run_syscall(int min, int max)
CASE_TEST(mmap_bad); EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break;
CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break;
CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break;
- CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break;
- CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break;
+ CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", O_RDONLY), -1); if (tmp != -1) close(tmp); break;
+ CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", O_RDONLY), -1, ENOENT); if (tmp != -1) close(tmp); break;
+ CASE_TEST(openat_dir); EXPECT_SYSZR(1, test_openat()); break;
CASE_TEST(pipe); EXPECT_SYSZR(1, test_pipe()); break;
CASE_TEST(poll_null); EXPECT_SYSZR(1, poll(NULL, 0, 0)); break;
CASE_TEST(poll_stdout); EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break;
@@ -1284,6 +1342,73 @@ static int expect_vfprintf(int llen, int c, const char *expected, const char *fm
return ret;
}
+static int test_scanf(void)
+{
+ unsigned long long ull;
+ unsigned long ul;
+ unsigned int u;
+ long long ll;
+ long l;
+ void *p;
+ int i;
+
+ /* return __LINE__ to point to the specific failure */
+
+ /* test EOF */
+ if (sscanf("", "foo") != EOF)
+ return __LINE__;
+
+ /* test simple literal without placeholder */
+ if (sscanf("foo", "foo") != 0)
+ return __LINE__;
+
+ /* test single placeholder */
+ if (sscanf("123", "%d", &i) != 1)
+ return __LINE__;
+
+ if (i != 123)
+ return __LINE__;
+
+ /* test multiple place holders and separators */
+ if (sscanf("a123b456c0x90", "a%db%uc%p", &i, &u, &p) != 3)
+ return __LINE__;
+
+ if (i != 123)
+ return __LINE__;
+
+ if (u != 456)
+ return __LINE__;
+
+ if (p != (void *)0x90)
+ return __LINE__;
+
+ /* test space handling */
+ if (sscanf("a b1", "a b%d", &i) != 1)
+ return __LINE__;
+
+ if (i != 1)
+ return __LINE__;
+
+ /* test literal percent */
+ if (sscanf("a%1", "a%%%d", &i) != 1)
+ return __LINE__;
+
+ if (i != 1)
+ return __LINE__;
+
+ /* test stdint.h types */
+ if (sscanf("1|2|3|4|5|6",
+ "%d|%ld|%lld|%u|%lu|%llu",
+ &i, &l, &ll, &u, &ul, &ull) != 6)
+ return __LINE__;
+
+ if (i != 1 || l != 2 || ll != 3 ||
+ u != 4 || ul != 5 || ull != 6)
+ return __LINE__;
+
+ return 0;
+}
+
static int run_vfprintf(int min, int max)
{
int test;
@@ -1305,6 +1430,7 @@ static int run_vfprintf(int min, int max)
CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break;
CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break;
CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break;
+ CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh
index 9c5160c53881..0299a0912d40 100755
--- a/tools/testing/selftests/nolibc/run-tests.sh
+++ b/tools/testing/selftests/nolibc/run-tests.sh
@@ -17,7 +17,16 @@ perform_download=0
test_mode=system
werror=1
llvm=
-archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv32 riscv64 s390 loongarch"
+all_archs=(
+ i386 x86_64
+ arm64 arm armthumb
+ mips32le mips32be
+ ppc ppc64 ppc64le
+ riscv32 riscv64
+ s390x s390
+ loongarch
+)
+archs="${all_archs[@]}"
TEMP=$(getopt -o 'j:d:c:b:a:m:pelh' -n "$0" -- "$@")
@@ -94,19 +103,21 @@ fi
crosstool_arch() {
case "$1" in
arm64) echo aarch64;;
+ armthumb) echo arm;;
ppc) echo powerpc;;
ppc64) echo powerpc64;;
ppc64le) echo powerpc64;;
riscv) echo riscv64;;
loongarch) echo loongarch64;;
mips*) echo mips;;
+ s390*) echo s390;;
*) echo "$1";;
esac
}
crosstool_abi() {
case "$1" in
- arm) echo linux-gnueabi;;
+ arm | armthumb) echo linux-gnueabi;;
*) echo linux;;
esac
}
@@ -157,10 +168,6 @@ test_arch() {
fi
MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}")
- mkdir -p "$build_dir"
- if [ "$test_mode" = "system" ] && [ ! -f "${build_dir}/.config" ]; then
- swallow_output "${MAKE[@]}" defconfig
- fi
case "$test_mode" in
'system')
test_target=run
@@ -173,6 +180,13 @@ test_arch() {
exit 1
esac
printf '%-15s' "$arch:"
+ if [ "$arch" = "s390" ] && ([ "$llvm" = "1" ] || [ "$test_mode" = "user" ]); then
+ echo "Unsupported configuration"
+ return
+ fi
+
+ mkdir -p "$build_dir"
+ swallow_output "${MAKE[@]}" defconfig
swallow_output "${MAKE[@]}" CFLAGS_EXTRA="$CFLAGS_EXTRA" "$test_target" V=1
cp run.out run.out."${arch}"
"${MAKE[@]}" report | grep passed
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index bf92481f925c..0406a065deb4 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -8,3 +8,5 @@ pidfd_getfd_test
pidfd_setns_test
pidfd_file_handle_test
pidfd_bind_mount
+pidfd_info_test
+pidfd_exec_helper
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index 301343a11b62..fcbefc0d77f6 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -3,7 +3,9 @@ CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall
TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \
- pidfd_file_handle_test pidfd_bind_mount
+ pidfd_file_handle_test pidfd_bind_mount pidfd_info_test
+
+TEST_GEN_PROGS_EXTENDED := pidfd_exec_helper
include ../lib.mk
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index 0b96ac4b8ce5..cec22aa11cdf 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -12,6 +12,7 @@
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
+#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/wait.h>
@@ -50,6 +51,107 @@
#define PIDFD_NONBLOCK O_NONBLOCK
#endif
+#ifndef PIDFD_SELF_THREAD
+#define PIDFD_SELF_THREAD -10000 /* Current thread. */
+#endif
+
+#ifndef PIDFD_SELF_THREAD_GROUP
+#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */
+#endif
+
+#ifndef PIDFD_SELF
+#define PIDFD_SELF PIDFD_SELF_THREAD
+#endif
+
+#ifndef PIDFD_SELF_PROCESS
+#define PIDFD_SELF_PROCESS PIDFD_SELF_THREAD_GROUP
+#endif
+
+#ifndef PIDFS_IOCTL_MAGIC
+#define PIDFS_IOCTL_MAGIC 0xFF
+#endif
+
+#ifndef PIDFD_GET_CGROUP_NAMESPACE
+#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
+#endif
+
+#ifndef PIDFD_GET_IPC_NAMESPACE
+#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
+#endif
+
+#ifndef PIDFD_GET_MNT_NAMESPACE
+#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
+#endif
+
+#ifndef PIDFD_GET_NET_NAMESPACE
+#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
+#endif
+
+#ifndef PIDFD_GET_PID_NAMESPACE
+#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
+#endif
+
+#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE
+#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
+#endif
+
+#ifndef PIDFD_GET_TIME_NAMESPACE
+#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
+#endif
+
+#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE
+#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
+#endif
+
+#ifndef PIDFD_GET_USER_NAMESPACE
+#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
+#endif
+
+#ifndef PIDFD_GET_UTS_NAMESPACE
+#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
+#endif
+
+#ifndef PIDFD_GET_INFO
+#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
+#endif
+
+#ifndef PIDFD_INFO_PID
+#define PIDFD_INFO_PID (1UL << 0) /* Always returned, even if not requested */
+#endif
+
+#ifndef PIDFD_INFO_CREDS
+#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */
+#endif
+
+#ifndef PIDFD_INFO_CGROUPID
+#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */
+#endif
+
+#ifndef PIDFD_INFO_EXIT
+#define PIDFD_INFO_EXIT (1UL << 3) /* Always returned if available, even if not requested */
+#endif
+
+#ifndef PIDFD_THREAD
+#define PIDFD_THREAD O_EXCL
+#endif
+
+struct pidfd_info {
+ __u64 mask;
+ __u64 cgroupid;
+ __u32 pid;
+ __u32 tgid;
+ __u32 ppid;
+ __u32 ruid;
+ __u32 rgid;
+ __u32 euid;
+ __u32 egid;
+ __u32 suid;
+ __u32 sgid;
+ __u32 fsuid;
+ __u32 fsgid;
+ __s32 exit_code;
+};
+
/*
* The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
* That means, when it wraps around any pid < 300 will be skipped.
@@ -152,4 +254,11 @@ static inline ssize_t write_nointr(int fd, const void *buf, size_t count)
return ret;
}
+static inline int sys_execveat(int dirfd, const char *pathname,
+ char *const argv[], char *const envp[],
+ int flags)
+{
+ return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
+}
+
#endif /* __PIDFD_H */
diff --git a/tools/testing/selftests/pidfd/pidfd_exec_helper.c b/tools/testing/selftests/pidfd/pidfd_exec_helper.c
new file mode 100644
index 000000000000..5516808c95f2
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_exec_helper.c
@@ -0,0 +1,12 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int main(int argc, char *argv[])
+{
+ if (pause())
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
index f062a986e382..f718aac75068 100644
--- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
@@ -13,6 +13,7 @@
#include <syscall.h>
#include <sys/wait.h>
#include <sys/mman.h>
+#include <sys/mount.h>
#include "pidfd.h"
#include "../kselftest.h"
diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c
new file mode 100644
index 000000000000..1758a1b0457b
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_info_test.c
@@ -0,0 +1,692 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <poll.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/kcmp.h>
+#include <sys/stat.h>
+
+#include "pidfd.h"
+#include "../kselftest_harness.h"
+
+FIXTURE(pidfd_info)
+{
+ pid_t child_pid1;
+ int child_pidfd1;
+
+ pid_t child_pid2;
+ int child_pidfd2;
+
+ pid_t child_pid3;
+ int child_pidfd3;
+
+ pid_t child_pid4;
+ int child_pidfd4;
+};
+
+FIXTURE_SETUP(pidfd_info)
+{
+ int ret;
+ int ipc_sockets[2];
+ char c;
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ self->child_pid1 = create_child(&self->child_pidfd1, 0);
+ EXPECT_GE(self->child_pid1, 0);
+
+ if (self->child_pid1 == 0) {
+ close(ipc_sockets[0]);
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ _exit(EXIT_FAILURE);
+
+ close(ipc_sockets[1]);
+
+ pause();
+ _exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ /* SIGKILL but don't reap. */
+ EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0), 0);
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ self->child_pid2 = create_child(&self->child_pidfd2, 0);
+ EXPECT_GE(self->child_pid2, 0);
+
+ if (self->child_pid2 == 0) {
+ close(ipc_sockets[0]);
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ _exit(EXIT_FAILURE);
+
+ close(ipc_sockets[1]);
+
+ pause();
+ _exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ /* SIGKILL and reap. */
+ EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0), 0);
+ EXPECT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0);
+
+ self->child_pid3 = create_child(&self->child_pidfd3, CLONE_NEWUSER | CLONE_NEWPID);
+ EXPECT_GE(self->child_pid3, 0);
+
+ if (self->child_pid3 == 0)
+ _exit(EXIT_SUCCESS);
+
+ self->child_pid4 = create_child(&self->child_pidfd4, CLONE_NEWUSER | CLONE_NEWPID);
+ EXPECT_GE(self->child_pid4, 0);
+
+ if (self->child_pid4 == 0)
+ _exit(EXIT_SUCCESS);
+
+ EXPECT_EQ(sys_waitid(P_PID, self->child_pid4, NULL, WEXITED), 0);
+}
+
+FIXTURE_TEARDOWN(pidfd_info)
+{
+ sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0);
+ if (self->child_pidfd1 >= 0)
+ EXPECT_EQ(0, close(self->child_pidfd1));
+
+ sys_waitid(P_PID, self->child_pid1, NULL, WEXITED);
+
+ sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0);
+ if (self->child_pidfd2 >= 0)
+ EXPECT_EQ(0, close(self->child_pidfd2));
+
+ sys_waitid(P_PID, self->child_pid2, NULL, WEXITED);
+ sys_waitid(P_PID, self->child_pid3, NULL, WEXITED);
+ sys_waitid(P_PID, self->child_pid4, NULL, WEXITED);
+}
+
+TEST_F(pidfd_info, sigkill_exit)
+{
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID,
+ };
+
+ /* Process has exited but not been reaped so this must work. */
+ ASSERT_EQ(ioctl(self->child_pidfd1, PIDFD_GET_INFO, &info), 0);
+
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(self->child_pidfd1, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ /* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+}
+
+TEST_F(pidfd_info, sigkill_reaped)
+{
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID,
+ };
+
+ /* Process has already been reaped and PIDFD_INFO_EXIT hasn't been set. */
+ ASSERT_NE(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0);
+ ASSERT_EQ(errno, ESRCH);
+
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_TRUE(WIFSIGNALED(info.exit_code));
+ ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
+}
+
+TEST_F(pidfd_info, success_exit)
+{
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID,
+ };
+
+ /* Process has exited but not been reaped so this must work. */
+ ASSERT_EQ(ioctl(self->child_pidfd3, PIDFD_GET_INFO, &info), 0);
+
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(self->child_pidfd3, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ /* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+}
+
+TEST_F(pidfd_info, success_reaped)
+{
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID,
+ };
+
+ /* Process has already been reaped and PIDFD_INFO_EXIT hasn't been set. */
+ ASSERT_NE(ioctl(self->child_pidfd4, PIDFD_GET_INFO, &info), 0);
+ ASSERT_EQ(errno, ESRCH);
+
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(self->child_pidfd4, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_TRUE(WIFEXITED(info.exit_code));
+ ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+}
+
+TEST_F(pidfd_info, success_reaped_poll)
+{
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+ };
+ struct pollfd fds = {};
+ int nevents;
+
+ fds.events = POLLIN;
+ fds.fd = self->child_pidfd2;
+
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ ASSERT_TRUE(!!(fds.revents & POLLIN));
+ ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+ ASSERT_EQ(ioctl(self->child_pidfd2, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_TRUE(WIFSIGNALED(info.exit_code));
+ ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
+}
+
+static void *pidfd_info_pause_thread(void *arg)
+{
+ pid_t pid_thread = gettid();
+ int ipc_socket = *(int *)arg;
+
+ /* Inform the grand-parent what the tid of this thread is. */
+ if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+ return NULL;
+
+ close(ipc_socket);
+
+ /* Sleep untill we're killed. */
+ pause();
+ return NULL;
+}
+
+TEST_F(pidfd_info, thread_group)
+{
+ pid_t pid_leader, pid_poller, pid_thread;
+ pthread_t thread;
+ int nevents, pidfd_leader, pidfd_thread, pidfd_leader_thread, ret;
+ int ipc_sockets[2];
+ struct pollfd fds = {};
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+ }, info2;
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ pid_leader = create_child(&pidfd_leader, 0);
+ EXPECT_GE(pid_leader, 0);
+
+ if (pid_leader == 0) {
+ close(ipc_sockets[0]);
+
+ /* The thread will outlive the thread-group leader. */
+ if (pthread_create(&thread, NULL, pidfd_info_pause_thread, &ipc_sockets[1]))
+ syscall(__NR_exit, EXIT_FAILURE);
+
+ /* Make the thread-group leader exit prematurely. */
+ syscall(__NR_exit, EXIT_SUCCESS);
+ }
+
+ /*
+ * Opening a PIDFD_THREAD aka thread-specific pidfd based on a
+ * thread-group leader must succeed.
+ */
+ pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
+ ASSERT_GE(pidfd_leader_thread, 0);
+
+ pid_poller = fork();
+ ASSERT_GE(pid_poller, 0);
+ if (pid_poller == 0) {
+ /*
+ * We can't poll and wait for the old thread-group
+ * leader to exit using a thread-specific pidfd. The
+ * thread-group leader exited prematurely and
+ * notification is delayed until all subthreads have
+ * exited.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
+ if (nevents != 0)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLIN)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLHUP)
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Retrieve the tid of the thread. */
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ /* Opening a thread as a thread-group leader must fail. */
+ pidfd_thread = sys_pidfd_open(pid_thread, 0);
+ ASSERT_LT(pidfd_thread, 0);
+
+ /* Opening a thread as a PIDFD_THREAD must succeed. */
+ pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
+ ASSERT_GE(pidfd_thread, 0);
+
+ ASSERT_EQ(wait_for_pid(pid_poller), 0);
+
+ /*
+ * Note that pidfd_leader is a thread-group pidfd, so polling on it
+ * would only notify us once all thread in the thread-group have
+ * exited. So we can't poll before we have taken down the whole
+ * thread-group.
+ */
+
+ /* Get PIDFD_GET_INFO using the thread-group leader pidfd. */
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ /* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_EQ(info.pid, pid_leader);
+
+ /*
+ * Now retrieve the same info using the thread specific pidfd
+ * for the thread-group leader.
+ */
+ info2.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader_thread, PIDFD_GET_INFO, &info2), 0);
+ ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_CREDS));
+ /* Process has exited but not been reaped, so no PIDFD_INFO_EXIT information yet. */
+ ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_EXIT));
+ ASSERT_EQ(info2.pid, pid_leader);
+
+ /* Now try the thread-specific pidfd. */
+ ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ /* The thread hasn't exited, so no PIDFD_INFO_EXIT information yet. */
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_EQ(info.pid, pid_thread);
+
+ /*
+ * Take down the whole thread-group. The thread-group leader
+ * exited successfully but the thread will now be SIGKILLed.
+ * This must be reflected in the recorded exit information.
+ */
+ EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
+ EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
+
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader;
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ ASSERT_TRUE(!!(fds.revents & POLLIN));
+ /* The thread-group leader has been reaped. */
+ ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+ /*
+ * Retrieve exit information for the thread-group leader via the
+ * thread-group leader pidfd.
+ */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */
+ ASSERT_TRUE(WIFEXITED(info.exit_code));
+ ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+
+ /*
+ * Retrieve exit information for the thread-group leader via the
+ * thread-specific pidfd.
+ */
+ info2.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader_thread, PIDFD_GET_INFO, &info2), 0);
+ ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_EXIT));
+
+ /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */
+ ASSERT_TRUE(WIFEXITED(info2.exit_code));
+ ASSERT_EQ(WEXITSTATUS(info2.exit_code), 0);
+
+ /* Retrieve exit information for the thread. */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+
+ /* The thread got SIGKILLed. */
+ ASSERT_TRUE(WIFSIGNALED(info.exit_code));
+ ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL);
+
+ EXPECT_EQ(close(pidfd_leader), 0);
+ EXPECT_EQ(close(pidfd_thread), 0);
+}
+
+static void *pidfd_info_thread_exec(void *arg)
+{
+ pid_t pid_thread = gettid();
+ int ipc_socket = *(int *)arg;
+
+ /* Inform the grand-parent what the tid of this thread is. */
+ if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+ return NULL;
+
+ if (read_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+ return NULL;
+
+ close(ipc_socket);
+
+ sys_execveat(AT_FDCWD, "pidfd_exec_helper", NULL, NULL, 0);
+ return NULL;
+}
+
+TEST_F(pidfd_info, thread_group_exec)
+{
+ pid_t pid_leader, pid_poller, pid_thread;
+ pthread_t thread;
+ int nevents, pidfd_leader, pidfd_leader_thread, pidfd_thread, ret;
+ int ipc_sockets[2];
+ struct pollfd fds = {};
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+ };
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ pid_leader = create_child(&pidfd_leader, 0);
+ EXPECT_GE(pid_leader, 0);
+
+ if (pid_leader == 0) {
+ close(ipc_sockets[0]);
+
+ /* The thread will outlive the thread-group leader. */
+ if (pthread_create(&thread, NULL, pidfd_info_thread_exec, &ipc_sockets[1]))
+ syscall(__NR_exit, EXIT_FAILURE);
+
+ /* Make the thread-group leader exit prematurely. */
+ syscall(__NR_exit, EXIT_SUCCESS);
+ }
+
+ /* Open a thread-specific pidfd for the thread-group leader. */
+ pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
+ ASSERT_GE(pidfd_leader_thread, 0);
+
+ pid_poller = fork();
+ ASSERT_GE(pid_poller, 0);
+ if (pid_poller == 0) {
+ /*
+ * We can't poll and wait for the old thread-group
+ * leader to exit using a thread-specific pidfd. The
+ * thread-group leader exited prematurely and
+ * notification is delayed until all subthreads have
+ * exited.
+ *
+ * When the thread has execed it will taken over the old
+ * thread-group leaders struct pid. Calling poll after
+ * the thread execed will thus block again because a new
+ * thread-group has started.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
+ if (nevents != 0)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLIN)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLHUP)
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Retrieve the tid of the thread. */
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+
+ /* Opening a thread as a PIDFD_THREAD must succeed. */
+ pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
+ ASSERT_GE(pidfd_thread, 0);
+
+ /* Now that we've opened a thread-specific pidfd the thread can exec. */
+ ASSERT_EQ(write_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+
+ ASSERT_EQ(wait_for_pid(pid_poller), 0);
+
+ /* Wait until the kernel has SIGKILLed the thread. */
+ fds.events = POLLHUP;
+ fds.fd = pidfd_thread;
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ /* The thread has been reaped. */
+ ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+ /* Retrieve thread-specific exit info from pidfd. */
+ ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ /*
+ * While the kernel will have SIGKILLed the whole thread-group
+ * during exec it will cause the individual threads to exit
+ * cleanly.
+ */
+ ASSERT_TRUE(WIFEXITED(info.exit_code));
+ ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+
+ /*
+ * The thread-group leader is still alive, the thread has taken
+ * over its struct pid and thus its pid number.
+ */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_EQ(info.pid, pid_leader);
+
+ /* Take down the thread-group leader. */
+ EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
+
+ /*
+ * Afte the exec we're dealing with an empty thread-group so now
+ * we must see an exit notification on the thread-specific pidfd
+ * for the thread-group leader as there's no subthread that can
+ * revive the struct pid.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ ASSERT_TRUE(!!(fds.revents & POLLIN));
+ ASSERT_FALSE(!!(fds.revents & POLLHUP));
+
+ EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
+
+ /* Retrieve exit information for the thread-group leader. */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+
+ EXPECT_EQ(close(pidfd_leader), 0);
+ EXPECT_EQ(close(pidfd_thread), 0);
+}
+
+static void *pidfd_info_thread_exec_sane(void *arg)
+{
+ pid_t pid_thread = gettid();
+ int ipc_socket = *(int *)arg;
+
+ /* Inform the grand-parent what the tid of this thread is. */
+ if (write_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+ return NULL;
+
+ if (read_nointr(ipc_socket, &pid_thread, sizeof(pid_thread)) != sizeof(pid_thread))
+ return NULL;
+
+ close(ipc_socket);
+
+ sys_execveat(AT_FDCWD, "pidfd_exec_helper", NULL, NULL, 0);
+ return NULL;
+}
+
+TEST_F(pidfd_info, thread_group_exec_thread)
+{
+ pid_t pid_leader, pid_poller, pid_thread;
+ pthread_t thread;
+ int nevents, pidfd_leader, pidfd_leader_thread, pidfd_thread, ret;
+ int ipc_sockets[2];
+ struct pollfd fds = {};
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT,
+ };
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ pid_leader = create_child(&pidfd_leader, 0);
+ EXPECT_GE(pid_leader, 0);
+
+ if (pid_leader == 0) {
+ close(ipc_sockets[0]);
+
+ /* The thread will outlive the thread-group leader. */
+ if (pthread_create(&thread, NULL, pidfd_info_thread_exec_sane, &ipc_sockets[1]))
+ syscall(__NR_exit, EXIT_FAILURE);
+
+ /*
+ * Pause the thread-group leader. It will be killed once
+ * the subthread execs.
+ */
+ pause();
+ syscall(__NR_exit, EXIT_SUCCESS);
+ }
+
+ /* Retrieve the tid of the thread. */
+ EXPECT_EQ(close(ipc_sockets[1]), 0);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+
+ /* Opening a thread as a PIDFD_THREAD must succeed. */
+ pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD);
+ ASSERT_GE(pidfd_thread, 0);
+
+ /* Open a thread-specific pidfd for the thread-group leader. */
+ pidfd_leader_thread = sys_pidfd_open(pid_leader, PIDFD_THREAD);
+ ASSERT_GE(pidfd_leader_thread, 0);
+
+ pid_poller = fork();
+ ASSERT_GE(pid_poller, 0);
+ if (pid_poller == 0) {
+ /*
+ * The subthread will now exec. The struct pid of the old
+ * thread-group leader will be assumed by the subthread which
+ * becomes the new thread-group leader. So no exit notification
+ * must be generated. Wait for 5 seconds and call it a success
+ * if no notification has been received.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, 10000 /* wait 5 seconds */);
+ if (nevents != 0)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLIN)
+ _exit(EXIT_FAILURE);
+ if (fds.revents & POLLHUP)
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
+ /* Now that we've opened a thread-specific pidfd the thread can exec. */
+ ASSERT_EQ(write_nointr(ipc_sockets[0], &pid_thread, sizeof(pid_thread)), sizeof(pid_thread));
+ EXPECT_EQ(close(ipc_sockets[0]), 0);
+ ASSERT_EQ(wait_for_pid(pid_poller), 0);
+
+ /* Wait until the kernel has SIGKILLed the thread. */
+ fds.events = POLLHUP;
+ fds.fd = pidfd_thread;
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ /* The thread has been reaped. */
+ ASSERT_TRUE(!!(fds.revents & POLLHUP));
+
+ /* Retrieve thread-specific exit info from pidfd. */
+ ASSERT_EQ(ioctl(pidfd_thread, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+ /*
+ * While the kernel will have SIGKILLed the whole thread-group
+ * during exec it will cause the individual threads to exit
+ * cleanly.
+ */
+ ASSERT_TRUE(WIFEXITED(info.exit_code));
+ ASSERT_EQ(WEXITSTATUS(info.exit_code), 0);
+
+ /*
+ * The thread-group leader is still alive, the thread has taken
+ * over its struct pid and thus its pid number.
+ */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_EXIT));
+ ASSERT_EQ(info.pid, pid_leader);
+
+ /* Take down the thread-group leader. */
+ EXPECT_EQ(sys_pidfd_send_signal(pidfd_leader, SIGKILL, NULL, 0), 0);
+
+ /*
+ * Afte the exec we're dealing with an empty thread-group so now
+ * we must see an exit notification on the thread-specific pidfd
+ * for the thread-group leader as there's no subthread that can
+ * revive the struct pid.
+ */
+ fds.events = POLLIN;
+ fds.fd = pidfd_leader_thread;
+ nevents = poll(&fds, 1, -1);
+ ASSERT_EQ(nevents, 1);
+ ASSERT_TRUE(!!(fds.revents & POLLIN));
+ ASSERT_FALSE(!!(fds.revents & POLLHUP));
+
+ EXPECT_EQ(sys_waitid(P_PIDFD, pidfd_leader, NULL, WEXITED), 0);
+
+ /* Retrieve exit information for the thread-group leader. */
+ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT;
+ ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0);
+ ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS));
+ ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT));
+
+ EXPECT_EQ(close(pidfd_leader), 0);
+ EXPECT_EQ(close(pidfd_thread), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
index ce413a221bac..cd3de40e4977 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -22,32 +22,6 @@
#include "pidfd.h"
#include "../kselftest.h"
-#ifndef PIDFS_IOCTL_MAGIC
-#define PIDFS_IOCTL_MAGIC 0xFF
-#endif
-
-#ifndef PIDFD_GET_INFO
-#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
-#define PIDFD_INFO_CGROUPID (1UL << 0)
-
-struct pidfd_info {
- __u64 request_mask;
- __u64 cgroupid;
- __u32 pid;
- __u32 tgid;
- __u32 ppid;
- __u32 ruid;
- __u32 rgid;
- __u32 euid;
- __u32 egid;
- __u32 suid;
- __u32 sgid;
- __u32 fsuid;
- __u32 fsgid;
- __u32 spare0[1];
-};
-#endif
-
static int safe_int(const char *numstr, int *converted)
{
char *err = NULL;
@@ -148,7 +122,7 @@ out:
int main(int argc, char **argv)
{
struct pidfd_info info = {
- .request_mask = PIDFD_INFO_CGROUPID,
+ .mask = PIDFD_INFO_CGROUPID,
};
int pidfd = -1, ret = 1;
pid_t pid;
@@ -227,7 +201,7 @@ int main(int argc, char **argv)
getegid(), info.sgid);
goto on_error;
}
- if ((info.request_mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) {
+ if ((info.mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) {
ksft_print_msg("cgroupid should not be 0 when PIDFD_INFO_CGROUPID is set\n");
goto on_error;
}
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 222f8131283b..e6a079b3d5e2 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -16,55 +16,10 @@
#include <unistd.h>
#include <sys/socket.h>
#include <sys/stat.h>
-#include <linux/ioctl.h>
#include "pidfd.h"
#include "../kselftest_harness.h"
-#ifndef PIDFS_IOCTL_MAGIC
-#define PIDFS_IOCTL_MAGIC 0xFF
-#endif
-
-#ifndef PIDFD_GET_CGROUP_NAMESPACE
-#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
-#endif
-
-#ifndef PIDFD_GET_IPC_NAMESPACE
-#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
-#endif
-
-#ifndef PIDFD_GET_MNT_NAMESPACE
-#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
-#endif
-
-#ifndef PIDFD_GET_NET_NAMESPACE
-#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
-#endif
-
-#ifndef PIDFD_GET_PID_NAMESPACE
-#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
-#endif
-
-#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE
-#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
-#endif
-
-#ifndef PIDFD_GET_TIME_NAMESPACE
-#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
-#endif
-
-#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE
-#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
-#endif
-
-#ifndef PIDFD_GET_USER_NAMESPACE
-#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
-#endif
-
-#ifndef PIDFD_GET_UTS_NAMESPACE
-#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
-#endif
-
enum {
PIDFD_NS_USER,
PIDFD_NS_MNT,
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index e9728e86b4f2..fcd85cad9f18 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -42,12 +42,41 @@ static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *))
#endif
}
-static int signal_received;
+static pthread_t signal_received;
static void set_signal_received_on_sigusr1(int sig)
{
if (sig == SIGUSR1)
- signal_received = 1;
+ signal_received = pthread_self();
+}
+
+static int send_signal(int pidfd)
+{
+ int ret = 0;
+
+ if (sys_pidfd_send_signal(pidfd, SIGUSR1, NULL, 0) < 0) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (signal_received != pthread_self()) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+exit:
+ signal_received = 0;
+ return ret;
+}
+
+static void *send_signal_worker(void *arg)
+{
+ int pidfd = (int)(intptr_t)arg;
+ int ret;
+
+ /* We forward any errors for the caller to handle. */
+ ret = send_signal(pidfd);
+ return (void *)(intptr_t)ret;
}
/*
@@ -56,8 +85,11 @@ static void set_signal_received_on_sigusr1(int sig)
*/
static int test_pidfd_send_signal_simple_success(void)
{
- int pidfd, ret;
+ int pidfd;
const char *test_name = "pidfd_send_signal send SIGUSR1";
+ pthread_t thread;
+ void *thread_res;
+ int err;
if (!have_pidfd_send_signal) {
ksft_test_result_skip(
@@ -66,25 +98,45 @@ static int test_pidfd_send_signal_simple_success(void)
return 0;
}
+ signal(SIGUSR1, set_signal_received_on_sigusr1);
+
+ /* Try sending a signal to ourselves via /proc/self. */
pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC);
if (pidfd < 0)
ksft_exit_fail_msg(
"%s test: Failed to open process file descriptor\n",
test_name);
+ err = send_signal(pidfd);
+ if (err)
+ ksft_exit_fail_msg(
+ "%s test: Error %d on sending pidfd signal\n",
+ test_name, err);
+ close(pidfd);
- signal(SIGUSR1, set_signal_received_on_sigusr1);
+ /* Now try the same thing only using PIDFD_SELF_THREAD_GROUP. */
+ err = send_signal(PIDFD_SELF_THREAD_GROUP);
+ if (err)
+ ksft_exit_fail_msg(
+ "%s test: Error %d on PIDFD_SELF_THREAD_GROUP signal\n",
+ test_name, err);
- ret = sys_pidfd_send_signal(pidfd, SIGUSR1, NULL, 0);
- close(pidfd);
- if (ret < 0)
- ksft_exit_fail_msg("%s test: Failed to send signal\n",
+ /*
+ * Now try the same thing in a thread and assert thread ID is equal to
+ * worker thread ID.
+ */
+ if (pthread_create(&thread, NULL, send_signal_worker,
+ (void *)(intptr_t)PIDFD_SELF_THREAD))
+ ksft_exit_fail_msg("%s test: Failed to create thread\n",
test_name);
-
- if (signal_received != 1)
- ksft_exit_fail_msg("%s test: Failed to receive signal\n",
+ if (pthread_join(thread, &thread_res))
+ ksft_exit_fail_msg("%s test: Failed to join thread\n",
test_name);
+ err = (int)(intptr_t)thread_res;
+ if (err)
+ ksft_exit_fail_msg(
+ "%s test: Error %d on PIDFD_SELF_THREAD signal\n",
+ test_name, err);
- signal_received = 0;
ksft_test_result_pass("%s test: Sent signal\n", test_name);
return 0;
}
diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
index 2e63ef009d59..2db12c5cad9c 100755
--- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
+++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
@@ -49,7 +49,7 @@ do
do
err=
val=$((d*1000+t*10+c))
- tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --bootargs "rcutorture.test_srcu_lockdep=$val" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1
+ tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x2" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1
ret=$?
mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val"
if test "$d" -ne 0 && test "$ret" -eq 0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index 2db39f298d18..fb61703690cb 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -2,3 +2,4 @@ rcutorture.torture_type=srcud
rcupdate.rcu_self_test=1
rcutorture.fwd_progress=3
srcutree.big_cpu_lim=5
+rcutorture.reader_flavor=0x8
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
index c419cac233ee..54f5c9053474 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
@@ -2,3 +2,9 @@ rcutree.gp_preinit_delay=3
rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcupdate.rcu_self_test=1
+
+# This part is for synchronize_rcu() testing
+rcutorture.nfakewriters=-1
+rcutorture.gp_sync=1
+rcupdate.rcu_normal=1
+rcutree.rcu_normal_wake_from_gp=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index d30922d8c883..352393bc5c56 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -1,7 +1,8 @@
CONFIG_SMP=y
CONFIG_NR_CPUS=16
-CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT_LAZY=y
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TREE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
index 759ee51d3ddc..420632b030dc 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -1,6 +1,7 @@
CONFIG_SMP=y
CONFIG_NR_CPUS=74
-CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_LAZY=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
CONFIG_PREEMPT_DYNAMIC=n
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
index 16496de5f6ce..0fda241fa62b 100644
--- a/tools/testing/selftests/rseq/.gitignore
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -9,3 +9,4 @@ param_test_compare_twice
param_test_mm_cid
param_test_mm_cid_benchmark
param_test_mm_cid_compare_twice
+syscall_errors_test
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index 5a3432fceb58..0d0a5fae5954 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -16,11 +16,12 @@ OVERRIDE_TARGETS = 1
TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \
param_test_benchmark param_test_compare_twice param_test_mm_cid \
- param_test_mm_cid_benchmark param_test_mm_cid_compare_twice
+ param_test_mm_cid_benchmark param_test_mm_cid_compare_twice \
+ syscall_errors_test
TEST_GEN_PROGS_EXTENDED = librseq.so
-TEST_PROGS = run_param_test.sh
+TEST_PROGS = run_param_test.sh run_syscall_errors_test.sh
TEST_FILES := settings
@@ -54,3 +55,7 @@ $(OUTPUT)/param_test_mm_cid_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
$(OUTPUT)/param_test_mm_cid_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
rseq.h rseq-*.h
$(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/syscall_errors_test: syscall_errors_test.c $(TEST_GEN_PROGS_EXTENDED) \
+ rseq.h rseq-*.h
+ $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
diff --git a/tools/testing/selftests/rseq/rseq-riscv-bits.h b/tools/testing/selftests/rseq/rseq-riscv-bits.h
index de31a0143139..f02f411d550d 100644
--- a/tools/testing/selftests/rseq/rseq-riscv-bits.h
+++ b/tools/testing/selftests/rseq/rseq-riscv-bits.h
@@ -243,7 +243,7 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i
#ifdef RSEQ_COMPARE_TWICE
RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
#endif
- RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3)
+ RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, 3)
RSEQ_INJECT_ASM(4)
RSEQ_ASM_DEFINE_ABORT(4, abort)
: /* gcc asm goto does not allow outputs */
@@ -251,8 +251,8 @@ int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, i
[current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
[rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
[ptr] "r" (ptr),
- [off] "er" (off),
- [inc] "er" (inc)
+ [off] "r" (off),
+ [inc] "r" (inc)
RSEQ_INJECT_INPUT
: "memory", RSEQ_ASM_TMP_REG_1
RSEQ_INJECT_CLOBBER
diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h
index 37e598d0a365..67d544aaa9a3 100644
--- a/tools/testing/selftests/rseq/rseq-riscv.h
+++ b/tools/testing/selftests/rseq/rseq-riscv.h
@@ -158,7 +158,7 @@ do { \
"bnez " RSEQ_ASM_TMP_REG_1 ", 222b\n" \
"333:\n"
-#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label) \
+#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, inc, post_commit_label) \
"mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n" \
RSEQ_ASM_OP_R_ADD(off) \
REG_L RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index f6156790c3b4..663a9cef1952 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -71,9 +71,20 @@ static int rseq_ownership;
/* Original struct rseq allocation size is 32 bytes. */
#define ORIG_RSEQ_ALLOC_SIZE 32
+/*
+ * Use a union to ensure we allocate a TLS area of 1024 bytes to accomodate an
+ * rseq registration that is larger than the current rseq ABI.
+ */
+union rseq_tls {
+ struct rseq_abi abi;
+ char dummy[RSEQ_THREAD_AREA_ALLOC_SIZE];
+};
+
static
-__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
- .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
+__thread union rseq_tls __rseq __attribute__((tls_model("initial-exec"))) = {
+ .abi = {
+ .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
+ },
};
static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
@@ -87,7 +98,7 @@ static int sys_getcpu(unsigned *cpu, unsigned *node)
return syscall(__NR_getcpu, cpu, node, NULL);
}
-int rseq_available(void)
+bool rseq_available(void)
{
int rc;
@@ -96,9 +107,9 @@ int rseq_available(void)
abort();
switch (errno) {
case ENOSYS:
- return 0;
+ return false;
case EINVAL:
- return 1;
+ return true;
default:
abort();
}
@@ -149,7 +160,7 @@ int rseq_register_current_thread(void)
/* Treat libc's ownership as a successful registration. */
return 0;
}
- rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG);
+ rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG);
if (rc) {
/*
* After at least one thread has registered successfully
@@ -183,7 +194,7 @@ int rseq_unregister_current_thread(void)
/* Treat libc's ownership as a successful unregistration. */
return 0;
}
- rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+ rc = sys_rseq(&__rseq.abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
if (rc)
return -1;
return 0;
@@ -249,7 +260,7 @@ void rseq_init(void)
rseq_ownership = 1;
/* Calculate the offset of the rseq area from the thread pointer. */
- rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
+ rseq_offset = (void *)&__rseq.abi - rseq_thread_pointer();
/* rseq flags are deprecated, always set to 0. */
rseq_flags = 0;
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index ba424ce80a71..f51a5fdb0444 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -160,6 +160,11 @@ int32_t rseq_fallback_current_cpu(void);
int32_t rseq_fallback_current_node(void);
/*
+ * Returns true if rseq is supported.
+ */
+bool rseq_available(void);
+
+/*
* Values returned can be either the current CPU number, -1 (rseq is
* uninitialized), or -2 (rseq initialization has failed).
*/
diff --git a/tools/testing/selftests/rseq/run_syscall_errors_test.sh b/tools/testing/selftests/rseq/run_syscall_errors_test.sh
new file mode 100755
index 000000000000..9272246b39f2
--- /dev/null
+++ b/tools/testing/selftests/rseq/run_syscall_errors_test.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: MIT
+# SPDX-FileCopyrightText: 2024 Michael Jeanson <mjeanson@efficios.com>
+
+GLIBC_TUNABLES="${GLIBC_TUNABLES:-}:glibc.pthread.rseq=0" ./syscall_errors_test
diff --git a/tools/testing/selftests/rseq/syscall_errors_test.c b/tools/testing/selftests/rseq/syscall_errors_test.c
new file mode 100644
index 000000000000..a5d9e1f8a2dc
--- /dev/null
+++ b/tools/testing/selftests/rseq/syscall_errors_test.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: 2024 Michael Jeanson <mjeanson@efficios.com>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <assert.h>
+#include <stdint.h>
+#include <syscall.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "rseq.h"
+
+static int sys_rseq(void *rseq_abi, uint32_t rseq_len,
+ int flags, uint32_t sig)
+{
+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+}
+
+/*
+ * Check the value of errno on some expected failures of the rseq syscall.
+ */
+
+int main(void)
+{
+ struct rseq_abi *global_rseq = rseq_get_abi();
+ int ret;
+ int errno_copy;
+
+ if (!rseq_available()) {
+ fprintf(stderr, "rseq syscall unavailable");
+ goto error;
+ }
+
+ /* The current thread is NOT registered. */
+
+ /* EINVAL */
+ errno = 0;
+ ret = sys_rseq(global_rseq, 32, -1, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Registration with invalid flag fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EINVAL)
+ goto error;
+
+ errno = 0;
+ ret = sys_rseq((char *) global_rseq + 1, 32, 0, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Registration with unaligned rseq_abi fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EINVAL)
+ goto error;
+
+ errno = 0;
+ ret = sys_rseq(global_rseq, 31, 0, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Registration with invalid size fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EINVAL)
+ goto error;
+
+
+#if defined(__LP64__) && (!defined(__s390__) && !defined(__s390x__))
+ /*
+ * We haven't found a reliable way to find an invalid address when
+ * running a 32bit userspace on a 64bit kernel, so only run this test
+ * on 64bit builds for the moment.
+ *
+ * Also exclude architectures that select
+ * CONFIG_ALTERNATE_USER_ADDRESS_SPACE where the kernel and userspace
+ * have their own address space and this failure can't happen.
+ */
+
+ /* EFAULT */
+ errno = 0;
+ ret = sys_rseq((void *) -4096UL, 32, 0, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Registration with invalid address fails with errno set to EFAULT (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EFAULT)
+ goto error;
+#endif
+
+ errno = 0;
+ ret = sys_rseq(global_rseq, 32, 0, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Registration succeeds for the current thread (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret != 0 && errno != 0)
+ goto error;
+
+ /* The current thread is registered. */
+
+ /* EBUSY */
+ errno = 0;
+ ret = sys_rseq(global_rseq, 32, 0, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Double registration fails with errno set to EBUSY (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EBUSY)
+ goto error;
+
+ /* EPERM */
+ errno = 0;
+ ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG + 1);
+ errno_copy = errno;
+ fprintf(stderr, "Unregistration with wrong RSEQ_SIG fails with errno to EPERM (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EPERM)
+ goto error;
+
+ errno = 0;
+ ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Unregistration succeeds for the current thread (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret != 0)
+ goto error;
+
+ errno = 0;
+ ret = sys_rseq(global_rseq, 32, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+ errno_copy = errno;
+ fprintf(stderr, "Double unregistration fails with errno set to EINVAL (ret = %d, errno = %s)\n", ret, strerrorname_np(errno_copy));
+ if (ret == 0 || errno_copy != EINVAL)
+ goto error;
+
+ return 0;
+error:
+ return -1;
+}
diff --git a/tools/testing/selftests/sched/config b/tools/testing/selftests/sched/config
index e8b09aa7c0c4..1bb8bf6d7fd4 100644
--- a/tools/testing/selftests/sched/config
+++ b/tools/testing/selftests/sched/config
@@ -1 +1 @@
-CONFIG_SCHED_DEBUG=y
+# empty
diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile
index 011762224600..f4531327b8e7 100644
--- a/tools/testing/selftests/sched_ext/Makefile
+++ b/tools/testing/selftests/sched_ext/Makefile
@@ -172,6 +172,7 @@ auto-test-targets := \
maximal \
maybe_null \
minimal \
+ numa \
prog_run \
reload_loop \
select_cpu_dfl \
diff --git a/tools/testing/selftests/sched_ext/config b/tools/testing/selftests/sched_ext/config
index 0de9b4ee249d..aa901b05c8ad 100644
--- a/tools/testing/selftests/sched_ext/config
+++ b/tools/testing/selftests/sched_ext/config
@@ -1,4 +1,3 @@
-CONFIG_SCHED_DEBUG=y
CONFIG_SCHED_CLASS_EXT=y
CONFIG_CGROUPS=y
CONFIG_CGROUP_SCHED=y
diff --git a/tools/testing/selftests/sched_ext/create_dsq.c b/tools/testing/selftests/sched_ext/create_dsq.c
index fa946d9146d4..d67431f57ac6 100644
--- a/tools/testing/selftests/sched_ext/create_dsq.c
+++ b/tools/testing/selftests/sched_ext/create_dsq.c
@@ -14,11 +14,11 @@ static enum scx_test_status setup(void **ctx)
{
struct create_dsq *skel;
- skel = create_dsq__open_and_load();
- if (!skel) {
- SCX_ERR("Failed to open and load skel");
- return SCX_TEST_FAIL;
- }
+ skel = create_dsq__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(create_dsq__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c
index e65d22f23f3b..b6d13496b24e 100644
--- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c
+++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.c
@@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx)
{
struct ddsp_bogus_dsq_fail *skel;
- skel = ddsp_bogus_dsq_fail__open_and_load();
- SCX_FAIL_IF(!skel, "Failed to open and load skel");
+ skel = ddsp_bogus_dsq_fail__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(ddsp_bogus_dsq_fail__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c
index abafee587cd6..af9ce4ee8baa 100644
--- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c
+++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.c
@@ -14,8 +14,11 @@ static enum scx_test_status setup(void **ctx)
{
struct ddsp_vtimelocal_fail *skel;
- skel = ddsp_vtimelocal_fail__open_and_load();
- SCX_FAIL_IF(!skel, "Failed to open and load skel");
+ skel = ddsp_vtimelocal_fail__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(ddsp_vtimelocal_fail__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
index fbda6bf54671..c02b2aa6fc64 100644
--- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
+++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
@@ -43,7 +43,7 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev)
if (!p)
return;
- if (p->nr_cpus_allowed == nr_cpus)
+ if (p->nr_cpus_allowed == nr_cpus && !is_migration_disabled(p))
target = bpf_get_prandom_u32() % nr_cpus;
else
target = scx_bpf_task_cpu(p);
diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.c b/tools/testing/selftests/sched_ext/dsp_local_on.c
index 0ff27e57fe43..e1f2ce4abfe6 100644
--- a/tools/testing/selftests/sched_ext/dsp_local_on.c
+++ b/tools/testing/selftests/sched_ext/dsp_local_on.c
@@ -15,6 +15,7 @@ static enum scx_test_status setup(void **ctx)
skel = dsp_local_on__open();
SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
skel->rodata->nr_cpus = libbpf_num_possible_cpus();
SCX_FAIL_IF(dsp_local_on__load(skel), "Failed to load skel");
diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c
index 73e679953e27..d3387ae03679 100644
--- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c
+++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c
@@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx)
{
struct enq_last_no_enq_fails *skel;
- skel = enq_last_no_enq_fails__open_and_load();
- if (!skel) {
- SCX_ERR("Failed to open and load skel");
- return SCX_TEST_FAIL;
- }
+ skel = enq_last_no_enq_fails__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(enq_last_no_enq_fails__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
index dd1350e5f002..a80e3a3b3698 100644
--- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
+++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.c
@@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx)
{
struct enq_select_cpu_fails *skel;
- skel = enq_select_cpu_fails__open_and_load();
- if (!skel) {
- SCX_ERR("Failed to open and load skel");
- return SCX_TEST_FAIL;
- }
+ skel = enq_select_cpu_fails__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(enq_select_cpu_fails__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c
index 31bcd06e21cd..9451782689de 100644
--- a/tools/testing/selftests/sched_ext/exit.c
+++ b/tools/testing/selftests/sched_ext/exit.c
@@ -23,6 +23,7 @@ static enum scx_test_status run(void *ctx)
char buf[16];
skel = exit__open();
+ SCX_ENUM_INIT(skel);
skel->rodata->exit_point = tc;
exit__load(skel);
link = bpf_map__attach_struct_ops(skel->maps.exit_ops);
diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c
index 87bf220b1bce..1c9ceb661c43 100644
--- a/tools/testing/selftests/sched_ext/hotplug.c
+++ b/tools/testing/selftests/sched_ext/hotplug.c
@@ -49,8 +49,10 @@ static enum scx_test_status test_hotplug(bool onlining, bool cbs_defined)
SCX_ASSERT(is_cpu_online());
- skel = hotplug__open_and_load();
- SCX_ASSERT(skel);
+ skel = hotplug__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(hotplug__load(skel), "Failed to load skel");
/* Testing the offline -> online path, so go offline before starting */
if (onlining)
diff --git a/tools/testing/selftests/sched_ext/init_enable_count.c b/tools/testing/selftests/sched_ext/init_enable_count.c
index 97d45f1e5597..eddf9e0e26e7 100644
--- a/tools/testing/selftests/sched_ext/init_enable_count.c
+++ b/tools/testing/selftests/sched_ext/init_enable_count.c
@@ -15,22 +15,6 @@
#define SCHED_EXT 7
-static struct init_enable_count *
-open_load_prog(bool global)
-{
- struct init_enable_count *skel;
-
- skel = init_enable_count__open();
- SCX_BUG_ON(!skel, "Failed to open skel");
-
- if (!global)
- skel->struct_ops.init_enable_count_ops->flags |= SCX_OPS_SWITCH_PARTIAL;
-
- SCX_BUG_ON(init_enable_count__load(skel), "Failed to load skel");
-
- return skel;
-}
-
static enum scx_test_status run_test(bool global)
{
struct init_enable_count *skel;
@@ -40,7 +24,14 @@ static enum scx_test_status run_test(bool global)
struct sched_param param = {};
pid_t pids[num_pre_forks];
- skel = open_load_prog(global);
+ skel = init_enable_count__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+
+ if (!global)
+ skel->struct_ops.init_enable_count_ops->flags |= SCX_OPS_SWITCH_PARTIAL;
+
+ SCX_FAIL_IF(init_enable_count__load(skel), "Failed to load skel");
/*
* Fork a bunch of children before we attach the scheduler so that we
@@ -159,7 +150,7 @@ static enum scx_test_status run(void *ctx)
struct scx_test init_enable_count = {
.name = "init_enable_count",
- .description = "Verify we do the correct amount of counting of init, "
+ .description = "Verify we correctly count the occurrences of init, "
"enable, etc callbacks.",
.run = run,
};
diff --git a/tools/testing/selftests/sched_ext/maximal.c b/tools/testing/selftests/sched_ext/maximal.c
index f38fc973c380..c6be50a9941d 100644
--- a/tools/testing/selftests/sched_ext/maximal.c
+++ b/tools/testing/selftests/sched_ext/maximal.c
@@ -14,8 +14,11 @@ static enum scx_test_status setup(void **ctx)
{
struct maximal *skel;
- skel = maximal__open_and_load();
- SCX_FAIL_IF(!skel, "Failed to open and load skel");
+ skel = maximal__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(maximal__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/maybe_null.c b/tools/testing/selftests/sched_ext/maybe_null.c
index 31cfafb0cf65..aacf0c58ca4f 100644
--- a/tools/testing/selftests/sched_ext/maybe_null.c
+++ b/tools/testing/selftests/sched_ext/maybe_null.c
@@ -43,7 +43,7 @@ static enum scx_test_status run(void *ctx)
struct scx_test maybe_null = {
.name = "maybe_null",
- .description = "Verify if PTR_MAYBE_NULL work for .dispatch",
+ .description = "Verify if PTR_MAYBE_NULL works for .dispatch",
.run = run,
};
REGISTER_SCX_TEST(&maybe_null)
diff --git a/tools/testing/selftests/sched_ext/minimal.c b/tools/testing/selftests/sched_ext/minimal.c
index 6c5db8ebbf8a..89f7261757ff 100644
--- a/tools/testing/selftests/sched_ext/minimal.c
+++ b/tools/testing/selftests/sched_ext/minimal.c
@@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx)
{
struct minimal *skel;
- skel = minimal__open_and_load();
- if (!skel) {
- SCX_ERR("Failed to open and load skel");
- return SCX_TEST_FAIL;
- }
+ skel = minimal__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(minimal__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/numa.bpf.c b/tools/testing/selftests/sched_ext/numa.bpf.c
new file mode 100644
index 000000000000..a79d86ed54a1
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/numa.bpf.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A scheduler that validates the behavior of the NUMA-aware
+ * functionalities.
+ *
+ * The scheduler creates a separate DSQ for each NUMA node, ensuring tasks
+ * are exclusively processed by CPUs within their respective nodes. Idle
+ * CPUs are selected only within the same node, so task migration can only
+ * occurs between CPUs belonging to the same node.
+ *
+ * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+UEI_DEFINE(uei);
+
+const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE;
+
+static bool is_cpu_idle(s32 cpu, int node)
+{
+ const struct cpumask *idle_cpumask;
+ bool idle;
+
+ idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node);
+ idle = bpf_cpumask_test_cpu(cpu, idle_cpumask);
+ scx_bpf_put_cpumask(idle_cpumask);
+
+ return idle;
+}
+
+s32 BPF_STRUCT_OPS(numa_select_cpu,
+ struct task_struct *p, s32 prev_cpu, u64 wake_flags)
+{
+ int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
+ s32 cpu;
+
+ /*
+ * We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here,
+ * since it already tries to pick an idle CPU within the node
+ * first, but let's use both functions for better testing coverage.
+ */
+ cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node,
+ __COMPAT_SCX_PICK_IDLE_IN_NODE);
+ if (cpu < 0)
+ cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node,
+ __COMPAT_SCX_PICK_IDLE_IN_NODE);
+
+ if (is_cpu_idle(cpu, node))
+ scx_bpf_error("CPU %d should be marked as busy", cpu);
+
+ if (__COMPAT_scx_bpf_cpu_node(cpu) != node)
+ scx_bpf_error("CPU %d should be in node %d", cpu, node);
+
+ return cpu;
+}
+
+void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags)
+{
+ int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
+
+ scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags);
+}
+
+void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev)
+{
+ int node = __COMPAT_scx_bpf_cpu_node(cpu);
+
+ scx_bpf_dsq_move_to_local(node);
+}
+
+s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init)
+{
+ int node, err;
+
+ bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) {
+ err = scx_bpf_create_dsq(node, node);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei)
+{
+ UEI_RECORD(uei, ei);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops numa_ops = {
+ .select_cpu = (void *)numa_select_cpu,
+ .enqueue = (void *)numa_enqueue,
+ .dispatch = (void *)numa_dispatch,
+ .init = (void *)numa_init,
+ .exit = (void *)numa_exit,
+ .name = "numa",
+};
diff --git a/tools/testing/selftests/sched_ext/numa.c b/tools/testing/selftests/sched_ext/numa.c
new file mode 100644
index 000000000000..b060c3b65c82
--- /dev/null
+++ b/tools/testing/selftests/sched_ext/numa.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
+ */
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include "numa.bpf.skel.h"
+#include "scx_test.h"
+
+static enum scx_test_status setup(void **ctx)
+{
+ struct numa *skel;
+
+ skel = numa__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ skel->rodata->__COMPAT_SCX_PICK_IDLE_IN_NODE = SCX_PICK_IDLE_IN_NODE;
+ skel->struct_ops.numa_ops->flags = SCX_OPS_BUILTIN_IDLE_PER_NODE;
+ SCX_FAIL_IF(numa__load(skel), "Failed to load skel");
+
+ *ctx = skel;
+
+ return SCX_TEST_PASS;
+}
+
+static enum scx_test_status run(void *ctx)
+{
+ struct numa *skel = ctx;
+ struct bpf_link *link;
+
+ link = bpf_map__attach_struct_ops(skel->maps.numa_ops);
+ SCX_FAIL_IF(!link, "Failed to attach scheduler");
+
+ /* Just sleeping is fine, plenty of scheduling events happening */
+ sleep(1);
+
+ SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_NONE));
+ bpf_link__destroy(link);
+
+ return SCX_TEST_PASS;
+}
+
+static void cleanup(void *ctx)
+{
+ struct numa *skel = ctx;
+
+ numa__destroy(skel);
+}
+
+struct scx_test numa = {
+ .name = "numa",
+ .description = "Verify NUMA-aware functionalities",
+ .setup = setup,
+ .run = run,
+ .cleanup = cleanup,
+};
+REGISTER_SCX_TEST(&numa)
diff --git a/tools/testing/selftests/sched_ext/prog_run.c b/tools/testing/selftests/sched_ext/prog_run.c
index 3cd57ef8daaa..05974820ca69 100644
--- a/tools/testing/selftests/sched_ext/prog_run.c
+++ b/tools/testing/selftests/sched_ext/prog_run.c
@@ -15,11 +15,11 @@ static enum scx_test_status setup(void **ctx)
{
struct prog_run *skel;
- skel = prog_run__open_and_load();
- if (!skel) {
- SCX_ERR("Failed to open and load skel");
- return SCX_TEST_FAIL;
- }
+ skel = prog_run__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(prog_run__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/reload_loop.c b/tools/testing/selftests/sched_ext/reload_loop.c
index 5cfba2d6e056..308211d80436 100644
--- a/tools/testing/selftests/sched_ext/reload_loop.c
+++ b/tools/testing/selftests/sched_ext/reload_loop.c
@@ -18,11 +18,10 @@ bool force_exit = false;
static enum scx_test_status setup(void **ctx)
{
- skel = maximal__open_and_load();
- if (!skel) {
- SCX_ERR("Failed to open and load skel");
- return SCX_TEST_FAIL;
- }
+ skel = maximal__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(maximal__load(skel), "Failed to load skel");
return SCX_TEST_PASS;
}
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.c
index a53a40c2d2f0..5b6e045e1109 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dfl.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.c
@@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx)
{
struct select_cpu_dfl *skel;
- skel = select_cpu_dfl__open_and_load();
- SCX_FAIL_IF(!skel, "Failed to open and load skel");
+ skel = select_cpu_dfl__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(select_cpu_dfl__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c
index 1d85bf4bf3a3..9b5d232efb7f 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.c
@@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx)
{
struct select_cpu_dfl_nodispatch *skel;
- skel = select_cpu_dfl_nodispatch__open_and_load();
- SCX_FAIL_IF(!skel, "Failed to open and load skel");
+ skel = select_cpu_dfl_nodispatch__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(select_cpu_dfl_nodispatch__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.c
index 0309ca8785b3..80283dbc41b7 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.c
@@ -17,8 +17,11 @@ static enum scx_test_status setup(void **ctx)
{
struct select_cpu_dispatch *skel;
- skel = select_cpu_dispatch__open_and_load();
- SCX_FAIL_IF(!skel, "Failed to open and load skel");
+ skel = select_cpu_dispatch__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(select_cpu_dispatch__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c
index 47eb6ed7627d..5e72ebbc90a5 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.c
@@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx)
{
struct select_cpu_dispatch_bad_dsq *skel;
- skel = select_cpu_dispatch_bad_dsq__open_and_load();
- SCX_FAIL_IF(!skel, "Failed to open and load skel");
+ skel = select_cpu_dispatch_bad_dsq__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(select_cpu_dispatch_bad_dsq__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c
index 48ff028a3c46..aa85949478bc 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.c
@@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx)
{
struct select_cpu_dispatch_dbl_dsp *skel;
- skel = select_cpu_dispatch_dbl_dsp__open_and_load();
- SCX_FAIL_IF(!skel, "Failed to open and load skel");
+ skel = select_cpu_dispatch_dbl_dsp__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(select_cpu_dispatch_dbl_dsp__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.c
index b4629c2364f5..1e9b5c9bfff1 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_vtime.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.c
@@ -15,8 +15,11 @@ static enum scx_test_status setup(void **ctx)
{
struct select_cpu_vtime *skel;
- skel = select_cpu_vtime__open_and_load();
- SCX_FAIL_IF(!skel, "Failed to open and load skel");
+ skel = select_cpu_vtime__open();
+ SCX_FAIL_IF(!skel, "Failed to open");
+ SCX_ENUM_INIT(skel);
+ SCX_FAIL_IF(select_cpu_vtime__load(skel), "Failed to load skel");
+
*ctx = skel;
return SCX_TEST_PASS;
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 14ba51b52095..b2f76a52215a 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -155,6 +155,12 @@ struct seccomp_data {
# endif
#endif
+#ifndef __NR_uretprobe
+# if defined(__x86_64__)
+# define __NR_uretprobe 335
+# endif
+#endif
+
#ifndef SECCOMP_SET_MODE_STRICT
#define SECCOMP_SET_MODE_STRICT 0
#endif
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
index 7126ec3485cb..2b61d8d79bde 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
@@ -61,5 +61,30 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "4009",
+ "name": "Reject creation of DRR class with classid TC_H_ROOT",
+ "category": [
+ "qdisc",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle ffff: drr",
+ "$TC filter add dev $DUMMY parent ffff: basic classid ffff:1",
+ "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr",
+ "$TC filter add dev $DUMMY parent ffff: prio 1 u32 match u16 0x0000 0xfe00 at 2 flowid ffff:ffff"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent ffff: classid ffff:ffff drr",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class drr ffff:ffff",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root"
+ ]
}
]
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
index 2fe5e983cb22..f89d052c730e 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.c
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -53,7 +53,7 @@ static struct vdso_info
/* Symbol table */
ELF(Sym) *symtab;
const char *symstrings;
- ELF(Word) *gnu_hash;
+ ELF(Word) *gnu_hash, *gnu_bucket;
ELF_HASH_ENTRY *bucket, *chain;
ELF_HASH_ENTRY nbucket, nchain;
@@ -185,8 +185,8 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base)
/* The bucket array is located after the header (4 uint32) and the bloom
* filter (size_t array of gnu_hash[2] elements).
*/
- vdso_info.bucket = vdso_info.gnu_hash + 4 +
- sizeof(size_t) / 4 * vdso_info.gnu_hash[2];
+ vdso_info.gnu_bucket = vdso_info.gnu_hash + 4 +
+ sizeof(size_t) / 4 * vdso_info.gnu_hash[2];
} else {
vdso_info.nbucket = hash[0];
vdso_info.nchain = hash[1];
@@ -268,11 +268,11 @@ void *vdso_sym(const char *version, const char *name)
if (vdso_info.gnu_hash) {
uint32_t h1 = gnu_hash(name), h2, *hashval;
- i = vdso_info.bucket[h1 % vdso_info.nbucket];
+ i = vdso_info.gnu_bucket[h1 % vdso_info.nbucket];
if (i == 0)
return 0;
h1 |= 1;
- hashval = vdso_info.bucket + vdso_info.nbucket +
+ hashval = vdso_info.gnu_bucket + vdso_info.nbucket +
(i - vdso_info.gnu_hash[1]);
for (;; i++) {
ELF(Sym) *sym = &vdso_info.symtab[i];
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index 139fd9aa8b12..c305d2f613f0 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -27,7 +27,6 @@ CONFIG_DEBUG_KMEMLEAK=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_WQ_WATCHDOG=y
-CONFIG_SCHED_DEBUG=y
CONFIG_SCHED_INFO=y
CONFIG_SCHEDSTATS=y
CONFIG_SCHED_STACK_END_CHECK=y
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index d51249f14e2f..28422c32cc8f 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -19,7 +19,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
- corrupt_xstate_header amx lam test_shadow_stack
+ corrupt_xstate_header amx lam test_shadow_stack avx
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
@@ -132,3 +132,7 @@ $(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
$(OUTPUT)/nx_stack_32: CFLAGS += -Wl,-z,noexecstack
$(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack
+
+$(OUTPUT)/avx_64: CFLAGS += -mno-avx -mno-avx512f
+$(OUTPUT)/amx_64: EXTRA_FILES += xstate.c
+$(OUTPUT)/avx_64: EXTRA_FILES += xstate.c
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
index 1fdf35a4d7f6..40769c16de1b 100644
--- a/tools/testing/selftests/x86/amx.c
+++ b/tools/testing/selftests/x86/amx.c
@@ -3,7 +3,6 @@
#define _GNU_SOURCE
#include <err.h>
#include <errno.h>
-#include <pthread.h>
#include <setjmp.h>
#include <stdio.h>
#include <string.h>
@@ -14,169 +13,27 @@
#include <sys/auxv.h>
#include <sys/mman.h>
#include <sys/shm.h>
-#include <sys/ptrace.h>
#include <sys/syscall.h>
#include <sys/wait.h>
-#include <sys/uio.h>
-#include "../kselftest.h" /* For __cpuid_count() */
+#include "helpers.h"
+#include "xstate.h"
#ifndef __x86_64__
# error This test is 64-bit only
#endif
-#define XSAVE_HDR_OFFSET 512
-#define XSAVE_HDR_SIZE 64
-
-struct xsave_buffer {
- union {
- struct {
- char legacy[XSAVE_HDR_OFFSET];
- char header[XSAVE_HDR_SIZE];
- char extended[0];
- };
- char bytes[0];
- };
-};
-
-static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
-{
- uint32_t rfbm_lo = rfbm;
- uint32_t rfbm_hi = rfbm >> 32;
-
- asm volatile("xsave (%%rdi)"
- : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
- : "memory");
-}
-
-static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
-{
- uint32_t rfbm_lo = rfbm;
- uint32_t rfbm_hi = rfbm >> 32;
-
- asm volatile("xrstor (%%rdi)"
- : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
-}
-
/* err() exits and will not return */
#define fatal_error(msg, ...) err(1, "[FAIL]\t" msg, ##__VA_ARGS__)
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- fatal_error("sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- fatal_error("sigaction");
-}
-
-#define XFEATURE_XTILECFG 17
-#define XFEATURE_XTILEDATA 18
#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG)
#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA)
#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
-#define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26)
-#define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27)
-
-static uint32_t xbuf_size;
-
-static struct {
- uint32_t xbuf_offset;
- uint32_t size;
-} xtiledata;
-
-#define CPUID_LEAF_XSTATE 0xd
-#define CPUID_SUBLEAF_XSTATE_USER 0x0
-#define TILE_CPUID 0x1d
-#define TILE_PALETTE_ID 0x1
-
-static void check_cpuid_xtiledata(void)
-{
- uint32_t eax, ebx, ecx, edx;
-
- __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
- eax, ebx, ecx, edx);
-
- /*
- * EBX enumerates the size (in bytes) required by the XSAVE
- * instruction for an XSAVE area containing all the user state
- * components corresponding to bits currently set in XCR0.
- *
- * Stash that off so it can be used to allocate buffers later.
- */
- xbuf_size = ebx;
-
- __cpuid_count(CPUID_LEAF_XSTATE, XFEATURE_XTILEDATA,
- eax, ebx, ecx, edx);
- /*
- * eax: XTILEDATA state component size
- * ebx: XTILEDATA state component offset in user buffer
- */
- if (!eax || !ebx)
- fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
- eax, ebx);
-
- xtiledata.size = eax;
- xtiledata.xbuf_offset = ebx;
-}
+struct xstate_info xtiledata;
/* The helpers for managing XSAVE buffer and tile states: */
-struct xsave_buffer *alloc_xbuf(void)
-{
- struct xsave_buffer *xbuf;
-
- /* XSAVE buffer should be 64B-aligned. */
- xbuf = aligned_alloc(64, xbuf_size);
- if (!xbuf)
- fatal_error("aligned_alloc()");
- return xbuf;
-}
-
-static inline void clear_xstate_header(struct xsave_buffer *buffer)
-{
- memset(&buffer->header, 0, sizeof(buffer->header));
-}
-
-static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv)
-{
- /* XSTATE_BV is at the beginning of the header: */
- *(uint64_t *)(&buffer->header) = bv;
-}
-
-static void set_rand_tiledata(struct xsave_buffer *xbuf)
-{
- int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset];
- int data;
- int i;
-
- /*
- * Ensure that 'data' is never 0. This ensures that
- * the registers are never in their initial configuration
- * and thus never tracked as being in the init state.
- */
- data = rand() | 1;
-
- for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++)
- *ptr = data;
-}
-
struct xsave_buffer *stashed_xsave;
static void init_stashed_xsave(void)
@@ -192,21 +49,6 @@ static void free_stashed_xsave(void)
free(stashed_xsave);
}
-/* See 'struct _fpx_sw_bytes' at sigcontext.h */
-#define SW_BYTES_OFFSET 464
-/* N.B. The struct's field name varies so read from the offset. */
-#define SW_BYTES_BV_OFFSET (SW_BYTES_OFFSET + 8)
-
-static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer)
-{
- return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET);
-}
-
-static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
-{
- return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
-}
-
/* Work around printf() being unsafe in signals: */
#define SIGNAL_BUF_LEN 1000
char signal_message_buffer[SIGNAL_BUF_LEN];
@@ -304,17 +146,10 @@ static inline bool load_rand_tiledata(struct xsave_buffer *xbuf)
{
clear_xstate_header(xbuf);
set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA);
- set_rand_tiledata(xbuf);
+ set_rand_data(&xtiledata, xbuf);
return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA);
}
-/* Return XTILEDATA to its initial configuration. */
-static inline void init_xtiledata(void)
-{
- clear_xstate_header(stashed_xsave);
- xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA);
-}
-
enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED };
/* arch_prctl() and sigaltstack() test */
@@ -587,14 +422,6 @@ static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1)
return true;
}
-static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf)
-{
- int ret = __validate_tiledata_regs(xbuf);
-
- if (ret != 0)
- fatal_error("TILEDATA registers changed");
-}
-
static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf)
{
int ret = __validate_tiledata_regs(xbuf);
@@ -651,251 +478,6 @@ static void test_fork(void)
_exit(0);
}
-/* Context switching test */
-
-static struct _ctxtswtest_cfg {
- unsigned int iterations;
- unsigned int num_threads;
-} ctxtswtest_config;
-
-struct futex_info {
- pthread_t thread;
- int nr;
- pthread_mutex_t mutex;
- struct futex_info *next;
-};
-
-static void *check_tiledata(void *info)
-{
- struct futex_info *finfo = (struct futex_info *)info;
- struct xsave_buffer *xbuf;
- int i;
-
- xbuf = alloc_xbuf();
- if (!xbuf)
- fatal_error("unable to allocate XSAVE buffer");
-
- /*
- * Load random data into 'xbuf' and then restore
- * it to the tile registers themselves.
- */
- load_rand_tiledata(xbuf);
- for (i = 0; i < ctxtswtest_config.iterations; i++) {
- pthread_mutex_lock(&finfo->mutex);
-
- /*
- * Ensure the register values have not
- * diverged from those recorded in 'xbuf'.
- */
- validate_tiledata_regs_same(xbuf);
-
- /* Load new, random values into xbuf and registers */
- load_rand_tiledata(xbuf);
-
- /*
- * The last thread's last unlock will be for
- * thread 0's mutex. However, thread 0 will
- * have already exited the loop and the mutex
- * will already be unlocked.
- *
- * Because this is not an ERRORCHECK mutex,
- * that inconsistency will be silently ignored.
- */
- pthread_mutex_unlock(&finfo->next->mutex);
- }
-
- free(xbuf);
- /*
- * Return this thread's finfo, which is
- * a unique value for this thread.
- */
- return finfo;
-}
-
-static int create_threads(int num, struct futex_info *finfo)
-{
- int i;
-
- for (i = 0; i < num; i++) {
- int next_nr;
-
- finfo[i].nr = i;
- /*
- * Thread 'i' will wait on this mutex to
- * be unlocked. Lock it immediately after
- * initialization:
- */
- pthread_mutex_init(&finfo[i].mutex, NULL);
- pthread_mutex_lock(&finfo[i].mutex);
-
- next_nr = (i + 1) % num;
- finfo[i].next = &finfo[next_nr];
-
- if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i]))
- fatal_error("pthread_create()");
- }
- return 0;
-}
-
-static void affinitize_cpu0(void)
-{
- cpu_set_t cpuset;
-
- CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
-
- if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
- fatal_error("sched_setaffinity to CPU 0");
-}
-
-static void test_context_switch(void)
-{
- struct futex_info *finfo;
- int i;
-
- /* Affinitize to one CPU to force context switches */
- affinitize_cpu0();
-
- req_xtiledata_perm();
-
- printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n",
- ctxtswtest_config.iterations,
- ctxtswtest_config.num_threads);
-
-
- finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads);
- if (!finfo)
- fatal_error("malloc()");
-
- create_threads(ctxtswtest_config.num_threads, finfo);
-
- /*
- * This thread wakes up thread 0
- * Thread 0 will wake up 1
- * Thread 1 will wake up 2
- * ...
- * the last thread will wake up 0
- *
- * ... this will repeat for the configured
- * number of iterations.
- */
- pthread_mutex_unlock(&finfo[0].mutex);
-
- /* Wait for all the threads to finish: */
- for (i = 0; i < ctxtswtest_config.num_threads; i++) {
- void *thread_retval;
- int rc;
-
- rc = pthread_join(finfo[i].thread, &thread_retval);
-
- if (rc)
- fatal_error("pthread_join() failed for thread %d err: %d\n",
- i, rc);
-
- if (thread_retval != &finfo[i])
- fatal_error("unexpected thread retval for thread %d: %p\n",
- i, thread_retval);
-
- }
-
- printf("[OK]\tNo incorrect case was found.\n");
-
- free(finfo);
-}
-
-/* Ptrace test */
-
-/*
- * Make sure the ptracee has the expanded kernel buffer on the first
- * use. Then, initialize the state before performing the state
- * injection from the ptracer.
- */
-static inline void ptracee_firstuse_tiledata(void)
-{
- load_rand_tiledata(stashed_xsave);
- init_xtiledata();
-}
-
-/*
- * Ptracer injects the randomized tile data state. It also reads
- * before and after that, which will execute the kernel's state copy
- * functions. So, the tester is advised to double-check any emitted
- * kernel messages.
- */
-static void ptracer_inject_tiledata(pid_t target)
-{
- struct xsave_buffer *xbuf;
- struct iovec iov;
-
- xbuf = alloc_xbuf();
- if (!xbuf)
- fatal_error("unable to allocate XSAVE buffer");
-
- printf("\tRead the init'ed tiledata via ptrace().\n");
-
- iov.iov_base = xbuf;
- iov.iov_len = xbuf_size;
-
- memset(stashed_xsave, 0, xbuf_size);
-
- if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
- fatal_error("PTRACE_GETREGSET");
-
- if (!__compare_tiledata_state(stashed_xsave, xbuf))
- printf("[OK]\tThe init'ed tiledata was read from ptracee.\n");
- else
- printf("[FAIL]\tThe init'ed tiledata was not read from ptracee.\n");
-
- printf("\tInject tiledata via ptrace().\n");
-
- load_rand_tiledata(xbuf);
-
- memcpy(&stashed_xsave->bytes[xtiledata.xbuf_offset],
- &xbuf->bytes[xtiledata.xbuf_offset],
- xtiledata.size);
-
- if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
- fatal_error("PTRACE_SETREGSET");
-
- if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
- fatal_error("PTRACE_GETREGSET");
-
- if (!__compare_tiledata_state(stashed_xsave, xbuf))
- printf("[OK]\tTiledata was correctly written to ptracee.\n");
- else
- printf("[FAIL]\tTiledata was not correctly written to ptracee.\n");
-}
-
-static void test_ptrace(void)
-{
- pid_t child;
- int status;
-
- child = fork();
- if (child < 0) {
- err(1, "fork");
- } else if (!child) {
- if (ptrace(PTRACE_TRACEME, 0, NULL, NULL))
- err(1, "PTRACE_TRACEME");
-
- ptracee_firstuse_tiledata();
-
- raise(SIGTRAP);
- _exit(0);
- }
-
- do {
- wait(&status);
- } while (WSTOPSIG(status) != SIGTRAP);
-
- ptracer_inject_tiledata(child);
-
- ptrace(PTRACE_DETACH, child, NULL, NULL);
- wait(&status);
- if (!WIFEXITED(status) || WEXITSTATUS(status))
- err(1, "ptrace test");
-}
-
int main(void)
{
unsigned long features;
@@ -907,7 +489,11 @@ int main(void)
return KSFT_SKIP;
}
- check_cpuid_xtiledata();
+ xtiledata = get_xstate_info(XFEATURE_XTILEDATA);
+ if (!xtiledata.size || !xtiledata.xbuf_offset) {
+ fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
+ xtiledata.size, xtiledata.xbuf_offset);
+ }
init_stashed_xsave();
sethandler(SIGILL, handle_noperm, 0);
@@ -919,11 +505,11 @@ int main(void)
test_fork();
- ctxtswtest_config.iterations = 10;
- ctxtswtest_config.num_threads = 5;
- test_context_switch();
-
- test_ptrace();
+ /*
+ * Perform generic xstate tests for context switching, ptrace,
+ * and signal.
+ */
+ test_xstate(XFEATURE_XTILEDATA);
clearhandler(SIGILL);
free_stashed_xsave();
diff --git a/tools/testing/selftests/x86/avx.c b/tools/testing/selftests/x86/avx.c
new file mode 100644
index 000000000000..11d5367c235f
--- /dev/null
+++ b/tools/testing/selftests/x86/avx.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE /* Required for inline xstate helpers */
+#include "xstate.h"
+
+int main(void)
+{
+ test_xstate(XFEATURE_YMM);
+ test_xstate(XFEATURE_OPMASK);
+ test_xstate(XFEATURE_ZMM_Hi256);
+ test_xstate(XFEATURE_Hi16_ZMM);
+}
diff --git a/tools/testing/selftests/x86/corrupt_xstate_header.c b/tools/testing/selftests/x86/corrupt_xstate_header.c
index cf9ce8fbb656..93a89a5997ca 100644
--- a/tools/testing/selftests/x86/corrupt_xstate_header.c
+++ b/tools/testing/selftests/x86/corrupt_xstate_header.c
@@ -18,6 +18,7 @@
#include <sys/wait.h>
#include "../kselftest.h" /* For __cpuid_count() */
+#include "helpers.h"
static inline int xsave_enabled(void)
{
@@ -29,19 +30,6 @@ static inline int xsave_enabled(void)
return ecx & (1U << 27);
}
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static void sigusr1(int sig, siginfo_t *info, void *uc_void)
{
ucontext_t *uc = uc_void;
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
index d1e919b0c1dc..5cb8393737d0 100644
--- a/tools/testing/selftests/x86/entry_from_vm86.c
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -24,31 +24,11 @@
#include <errno.h>
#include <sys/vm86.h>
+#include "helpers.h"
+
static unsigned long load_addr = 0x10000;
static int nerrs = 0;
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static sig_atomic_t got_signal;
static void sighandler(int sig, siginfo_t *info, void *ctx_void)
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 50cf32de6313..0a75252d31b6 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -28,6 +28,8 @@
#include <sys/wait.h>
#include <setjmp.h>
+#include "helpers.h"
+
#ifndef __x86_64__
# error This test is 64-bit only
#endif
@@ -39,28 +41,6 @@ static unsigned short *shared_scratch;
static int nerrs;
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t*)ctx_void;
diff --git a/tools/testing/selftests/x86/helpers.h b/tools/testing/selftests/x86/helpers.h
index 4ef42c4559a9..6deaad035161 100644
--- a/tools/testing/selftests/x86/helpers.h
+++ b/tools/testing/selftests/x86/helpers.h
@@ -2,8 +2,13 @@
#ifndef __SELFTESTS_X86_HELPERS_H
#define __SELFTESTS_X86_HELPERS_H
+#include <signal.h>
+#include <string.h>
+
#include <asm/processor-flags.h>
+#include "../kselftest.h"
+
static inline unsigned long get_eflags(void)
{
#ifdef __x86_64__
@@ -22,4 +27,27 @@ static inline void set_eflags(unsigned long eflags)
#endif
}
+static inline void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ ksft_exit_fail_msg("sigaction failed");
+}
+
+static inline void clearhandler(int sig)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ ksft_exit_fail_msg("sigaction failed");
+}
+
#endif /* __SELFTESTS_X86_HELPERS_H */
diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c
index 57ec5e99edb9..69d5fb7050c2 100644
--- a/tools/testing/selftests/x86/ioperm.c
+++ b/tools/testing/selftests/x86/ioperm.c
@@ -20,30 +20,9 @@
#include <sched.h>
#include <sys/io.h>
-static int nerrs = 0;
-
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-
-}
+#include "helpers.h"
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
+static int nerrs = 0;
static jmp_buf jmpbuf;
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
index 7e3e09c1abac..457b6715542b 100644
--- a/tools/testing/selftests/x86/iopl.c
+++ b/tools/testing/selftests/x86/iopl.c
@@ -20,30 +20,9 @@
#include <sched.h>
#include <sys/io.h>
-static int nerrs = 0;
-
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-
-}
+#include "helpers.h"
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
+static int nerrs = 0;
static jmp_buf jmpbuf;
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 4d4a76532dc9..18d736640ece 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -4,6 +4,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/syscall.h>
+#include <sys/ioctl.h>
#include <time.h>
#include <signal.h>
#include <setjmp.h>
@@ -43,7 +44,15 @@
#define FUNC_INHERITE 0x20
#define FUNC_PASID 0x40
+/* get_user() pointer test cases */
+#define GET_USER_USER 0
+#define GET_USER_KERNEL_TOP 1
+#define GET_USER_KERNEL_BOT 2
+#define GET_USER_KERNEL 3
+
#define TEST_MASK 0x7f
+#define L5_SIGN_EXT_MASK (0xFFUL << 56)
+#define L4_SIGN_EXT_MASK (0x1FFFFUL << 47)
#define LOW_ADDR (0x1UL << 30)
#define HIGH_ADDR (0x3UL << 48)
@@ -115,23 +124,42 @@ static void segv_handler(int sig)
siglongjmp(segv_env, 1);
}
-static inline int cpu_has_lam(void)
+static inline int lam_is_available(void)
{
unsigned int cpuinfo[4];
+ unsigned long bits = 0;
+ int ret;
__cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
- return (cpuinfo[0] & (1 << 26));
+ /* Check if cpu supports LAM */
+ if (!(cpuinfo[0] & (1 << 26))) {
+ ksft_print_msg("LAM is not supported!\n");
+ return 0;
+ }
+
+ /* Return 0 if CONFIG_ADDRESS_MASKING is not set */
+ ret = syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits);
+ if (ret) {
+ ksft_print_msg("LAM is disabled in the kernel!\n");
+ return 0;
+ }
+
+ return 1;
}
-/* Check 5-level page table feature in CPUID.(EAX=07H, ECX=00H):ECX.[bit 16] */
-static inline int cpu_has_la57(void)
+static inline int la57_enabled(void)
{
- unsigned int cpuinfo[4];
+ int ret;
+ void *p;
+
+ p = mmap((void *)HIGH_ADDR, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
- __cpuid_count(0x7, 0, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
+ ret = p == MAP_FAILED ? 0 : 1;
- return (cpuinfo[2] & (1 << 16));
+ munmap(p, PAGE_SIZE);
+ return ret;
}
/*
@@ -322,7 +350,7 @@ static int handle_mmap(struct testcases *test)
flags, -1, 0);
if (ptr == MAP_FAILED) {
if (test->addr == HIGH_ADDR)
- if (!cpu_has_la57())
+ if (!la57_enabled())
return 3; /* unsupport LA57 */
return 1;
}
@@ -370,6 +398,78 @@ static int handle_syscall(struct testcases *test)
return ret;
}
+static int get_user_syscall(struct testcases *test)
+{
+ uint64_t ptr_address, bitmask;
+ int fd, ret = 0;
+ void *ptr;
+
+ if (la57_enabled()) {
+ bitmask = L5_SIGN_EXT_MASK;
+ ptr_address = HIGH_ADDR;
+ } else {
+ bitmask = L4_SIGN_EXT_MASK;
+ ptr_address = LOW_ADDR;
+ }
+
+ ptr = mmap((void *)ptr_address, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+
+ if (ptr == MAP_FAILED) {
+ perror("failed to map byte to pass into get_user");
+ return 1;
+ }
+
+ if (set_lam(test->lam) != 0) {
+ ret = 2;
+ goto error;
+ }
+
+ fd = memfd_create("lam_ioctl", 0);
+ if (fd == -1) {
+ munmap(ptr, PAGE_SIZE);
+ exit(EXIT_FAILURE);
+ }
+
+ switch (test->later) {
+ case GET_USER_USER:
+ /* Control group - properly tagged user pointer */
+ ptr = (void *)set_metadata((uint64_t)ptr, test->lam);
+ break;
+ case GET_USER_KERNEL_TOP:
+ /* Kernel address with top bit cleared */
+ bitmask &= (bitmask >> 1);
+ ptr = (void *)((uint64_t)ptr | bitmask);
+ break;
+ case GET_USER_KERNEL_BOT:
+ /* Kernel address with bottom sign-extension bit cleared */
+ bitmask &= (bitmask << 1);
+ ptr = (void *)((uint64_t)ptr | bitmask);
+ break;
+ case GET_USER_KERNEL:
+ /* Try to pass a kernel address */
+ ptr = (void *)((uint64_t)ptr | bitmask);
+ break;
+ default:
+ printf("Invalid test case value passed!\n");
+ break;
+ }
+
+ /*
+ * Use FIOASYNC ioctl because it utilizes get_user() internally and is
+ * very non-invasive to the system. Pass differently tagged pointers to
+ * get_user() in order to verify that valid user pointers are going
+ * through and invalid kernel/non-canonical pointers are not.
+ */
+ if (ioctl(fd, FIOASYNC, ptr) != 0)
+ ret = 1;
+
+ close(fd);
+error:
+ munmap(ptr, PAGE_SIZE);
+ return ret;
+}
+
int sys_uring_setup(unsigned int entries, struct io_uring_params *p)
{
return (int)syscall(__NR_io_uring_setup, entries, p);
@@ -596,8 +696,10 @@ int do_uring(unsigned long lam)
fi->file_fd = file_fd;
ring = malloc(sizeof(*ring));
- if (!ring)
+ if (!ring) {
+ free(fi);
return 1;
+ }
memset(ring, 0, sizeof(struct io_ring));
@@ -883,6 +985,33 @@ static struct testcases syscall_cases[] = {
.test_func = handle_syscall,
.msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
},
+ {
+ .later = GET_USER_USER,
+ .lam = LAM_U57_BITS,
+ .test_func = get_user_syscall,
+ .msg = "GET_USER: get_user() and pass a properly tagged user pointer.\n",
+ },
+ {
+ .later = GET_USER_KERNEL_TOP,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = get_user_syscall,
+ .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the top bit cleared.\n",
+ },
+ {
+ .later = GET_USER_KERNEL_BOT,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = get_user_syscall,
+ .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the bottom sign-extension bit cleared.\n",
+ },
+ {
+ .later = GET_USER_KERNEL,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = get_user_syscall,
+ .msg = "GET_USER:[Negative] get_user() and pass a kernel pointer.\n",
+ },
};
static struct testcases mmap_cases[] = {
@@ -1181,10 +1310,8 @@ int main(int argc, char **argv)
tests_cnt = 0;
- if (!cpu_has_lam()) {
- ksft_print_msg("Unsupported LAM feature!\n");
+ if (!lam_is_available())
return KSFT_SKIP;
- }
while ((c = getopt(argc, argv, "ht:")) != -1) {
switch (c) {
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 3a29346e1452..bb99a71380a5 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -26,6 +26,8 @@
#include <asm/prctl.h>
#include <sys/prctl.h>
+#include "helpers.h"
+
#define AR_ACCESSED (1<<8)
#define AR_TYPE_RODATA (0 * (1<<9))
@@ -506,20 +508,6 @@ static void fix_sa_restorer(int sig)
}
#endif
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-
- fix_sa_restorer(sig);
-}
-
static jmp_buf jmpbuf;
static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
@@ -549,9 +537,11 @@ static void do_multicpu_tests(void)
}
sethandler(SIGSEGV, sigsegv, 0);
+ fix_sa_restorer(SIGSEGV);
#ifdef __i386__
/* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */
sethandler(SIGILL, sigsegv, 0);
+ fix_sa_restorer(SIGILL);
#endif
printf("[RUN]\tCross-CPU LDT invalidation\n");
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
index cc3de6ff9fba..f22cb6b382f9 100644
--- a/tools/testing/selftests/x86/mov_ss_trap.c
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -36,7 +36,7 @@
#include <setjmp.h>
#include <sys/prctl.h>
-#define X86_EFLAGS_RF (1UL << 16)
+#include "helpers.h"
#if __x86_64__
# define REG_IP REG_RIP
@@ -94,18 +94,6 @@ static void enable_watchpoint(void)
}
}
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static char const * const signames[] = {
[SIGSEGV] = "SIGSEGV",
[SIGBUS] = "SIBGUS",
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index 12aaa063196e..360ec88d5432 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -15,6 +15,8 @@
#include <asm/ptrace-abi.h>
#include <sys/auxv.h>
+#include "helpers.h"
+
/* Bitness-agnostic defines for user_regs_struct fields. */
#ifdef __x86_64__
# define user_syscall_nr orig_rax
@@ -93,18 +95,6 @@ static siginfo_t wait_trap(pid_t chld)
return si;
}
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static void setsigign(int sig, int flags)
{
struct sigaction sa;
@@ -116,16 +106,6 @@ static void setsigign(int sig, int flags)
err(1, "sigaction");
}
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
#ifdef __x86_64__
# define REG_BP REG_RBP
#else
diff --git a/tools/testing/selftests/x86/sigaltstack.c b/tools/testing/selftests/x86/sigaltstack.c
index f689af75e979..0ae1b784498c 100644
--- a/tools/testing/selftests/x86/sigaltstack.c
+++ b/tools/testing/selftests/x86/sigaltstack.c
@@ -14,6 +14,8 @@
#include <sys/resource.h>
#include <setjmp.h>
+#include "helpers.h"
+
/* sigaltstack()-enforced minimum stack */
#define ENFORCED_MINSIGSTKSZ 2048
@@ -27,30 +29,6 @@ static bool sigalrm_expected;
static unsigned long at_minstack_size;
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static int setup_altstack(void *start, unsigned long size)
{
stack_t ss;
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 0b75b29f794b..26ef562f4232 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -46,6 +46,8 @@
#include <sys/ptrace.h>
#include <sys/user.h>
+#include "helpers.h"
+
/* Pull in AR_xyz defines. */
typedef unsigned int u32;
typedef unsigned short u16;
@@ -138,28 +140,6 @@ static unsigned short LDT3(int idx)
return (idx << 3) | 7;
}
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static void add_ldt(const struct user_desc *desc, unsigned short *var,
const char *name)
{
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 9a30f443e928..280d7a22b9c9 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -33,28 +33,6 @@
#include "helpers.h"
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static volatile sig_atomic_t sig_traps, sig_eflags;
sigjmp_buf jmpbuf;
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index 48ab065a76f9..f67a2df335ba 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -17,18 +17,6 @@
#include "helpers.h"
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static sigjmp_buf jmpbuf;
static volatile sig_atomic_t n_errs;
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
index a108b80dd082..f9c9814160f0 100644
--- a/tools/testing/selftests/x86/syscall_nt.c
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -18,18 +18,6 @@
static unsigned int nerrs;
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
{
}
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index 991591718bb0..41c42b7b54a6 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -25,6 +25,7 @@
#include <sys/mman.h>
#include <linux/ptrace.h>
+#include "../kselftest.h"
/* Common system call numbers */
#define SYS_READ 0
@@ -313,7 +314,7 @@ static void test_syscall_numbering(void)
* The MSB is supposed to be ignored, so we loop over a few
* to test that out.
*/
- for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
+ for (size_t i = 0; i < ARRAY_SIZE(msbs); i++) {
int msb = msbs[i];
run("Checking system calls with msb = %d (0x%x)\n",
msb, msb);
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
index b30de9aaa6d4..5fb531e3ad7c 100644
--- a/tools/testing/selftests/x86/sysret_rip.c
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -22,6 +22,8 @@
#include <sys/mman.h>
#include <assert.h>
+#include "helpers.h"
+
/*
* These items are in clang_helpers_64.S, in order to avoid clang inline asm
* limitations:
@@ -31,28 +33,6 @@ extern const char test_page[];
static void const *current_test_page_addr = test_page;
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
-static void clearhandler(int sig)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_handler = SIG_DFL;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
/* State used by our signal handlers. */
static gregset_t initial_regs;
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 6de11b4df458..05e1e6774fba 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -310,19 +310,6 @@ static void test_getcpu(int cpu)
static jmp_buf jmpbuf;
static volatile unsigned long segv_err;
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- ksft_exit_fail_msg("sigaction failed\n");
-}
-
static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t *)ctx_void;
diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c
index 4c311e1af4c7..9cc17588d818 100644
--- a/tools/testing/selftests/x86/unwind_vdso.c
+++ b/tools/testing/selftests/x86/unwind_vdso.c
@@ -43,18 +43,6 @@ int main()
#include <dlfcn.h>
#include <unwind.h>
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
static volatile sig_atomic_t nerrs;
static unsigned long sysinfo;
static bool got_sysinfo = false;
diff --git a/tools/testing/selftests/x86/xstate.c b/tools/testing/selftests/x86/xstate.c
new file mode 100644
index 000000000000..23c1d6c964ea
--- /dev/null
+++ b/tools/testing/selftests/x86/xstate.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <elf.h>
+#include <pthread.h>
+#include <stdbool.h>
+
+#include <asm/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+
+#include "helpers.h"
+#include "xstate.h"
+
+/*
+ * The userspace xstate test suite is designed to be generic and operates
+ * with randomized xstate data. However, some states require special handling:
+ *
+ * - PKRU and XTILECFG need specific adjustments, such as modifying
+ * randomization behavior or using fixed values.
+ * - But, PKRU already has a dedicated test suite in /tools/selftests/mm.
+ * - Legacy states (FP and SSE) are excluded, as they are not considered
+ * part of extended states (xstates) and their usage is already deeply
+ * integrated into user-space libraries.
+ */
+#define XFEATURE_MASK_TEST_SUPPORTED \
+ ((1 << XFEATURE_YMM) | \
+ (1 << XFEATURE_OPMASK) | \
+ (1 << XFEATURE_ZMM_Hi256) | \
+ (1 << XFEATURE_Hi16_ZMM) | \
+ (1 << XFEATURE_XTILEDATA))
+
+static inline uint64_t xgetbv(uint32_t index)
+{
+ uint32_t eax, edx;
+
+ asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
+ return eax + ((uint64_t)edx << 32);
+}
+
+static inline uint64_t get_xstatebv(struct xsave_buffer *xbuf)
+{
+ return *(uint64_t *)(&xbuf->header);
+}
+
+static struct xstate_info xstate;
+
+struct futex_info {
+ unsigned int iterations;
+ struct futex_info *next;
+ pthread_mutex_t mutex;
+ pthread_t thread;
+ bool valid;
+ int nr;
+};
+
+static inline void load_rand_xstate(struct xstate_info *xstate, struct xsave_buffer *xbuf)
+{
+ clear_xstate_header(xbuf);
+ set_xstatebv(xbuf, xstate->mask);
+ set_rand_data(xstate, xbuf);
+ xrstor(xbuf, xstate->mask);
+}
+
+static inline void load_init_xstate(struct xstate_info *xstate, struct xsave_buffer *xbuf)
+{
+ clear_xstate_header(xbuf);
+ xrstor(xbuf, xstate->mask);
+}
+
+static inline void copy_xstate(struct xsave_buffer *xbuf_dst, struct xsave_buffer *xbuf_src)
+{
+ memcpy(&xbuf_dst->bytes[xstate.xbuf_offset],
+ &xbuf_src->bytes[xstate.xbuf_offset],
+ xstate.size);
+}
+
+static inline bool validate_xstate_same(struct xsave_buffer *xbuf1, struct xsave_buffer *xbuf2)
+{
+ int ret;
+
+ ret = memcmp(&xbuf1->bytes[xstate.xbuf_offset],
+ &xbuf2->bytes[xstate.xbuf_offset],
+ xstate.size);
+ return ret == 0;
+}
+
+static inline bool validate_xregs_same(struct xsave_buffer *xbuf1)
+{
+ struct xsave_buffer *xbuf2;
+ bool ret;
+
+ xbuf2 = alloc_xbuf();
+ if (!xbuf2)
+ ksft_exit_fail_msg("failed to allocate XSAVE buffer\n");
+
+ xsave(xbuf2, xstate.mask);
+ ret = validate_xstate_same(xbuf1, xbuf2);
+
+ free(xbuf2);
+ return ret;
+}
+
+/* Context switching test */
+
+static void *check_xstate(void *info)
+{
+ struct futex_info *finfo = (struct futex_info *)info;
+ struct xsave_buffer *xbuf;
+ int i;
+
+ xbuf = alloc_xbuf();
+ if (!xbuf)
+ ksft_exit_fail_msg("unable to allocate XSAVE buffer\n");
+
+ /*
+ * Load random data into 'xbuf' and then restore it to the xstate
+ * registers.
+ */
+ load_rand_xstate(&xstate, xbuf);
+ finfo->valid = true;
+
+ for (i = 0; i < finfo->iterations; i++) {
+ pthread_mutex_lock(&finfo->mutex);
+
+ /*
+ * Ensure the register values have not diverged from the
+ * record. Then reload a new random value. If it failed
+ * ever before, skip it.
+ */
+ if (finfo->valid) {
+ finfo->valid = validate_xregs_same(xbuf);
+ load_rand_xstate(&xstate, xbuf);
+ }
+
+ /*
+ * The last thread's last unlock will be for thread 0's
+ * mutex. However, thread 0 will have already exited the
+ * loop and the mutex will already be unlocked.
+ *
+ * Because this is not an ERRORCHECK mutex, that
+ * inconsistency will be silently ignored.
+ */
+ pthread_mutex_unlock(&finfo->next->mutex);
+ }
+
+ free(xbuf);
+ return finfo;
+}
+
+static void create_threads(uint32_t num_threads, uint32_t iterations, struct futex_info *finfo)
+{
+ int i;
+
+ for (i = 0; i < num_threads; i++) {
+ int next_nr;
+
+ finfo[i].nr = i;
+ finfo[i].iterations = iterations;
+
+ /*
+ * Thread 'i' will wait on this mutex to be unlocked.
+ * Lock it immediately after initialization:
+ */
+ pthread_mutex_init(&finfo[i].mutex, NULL);
+ pthread_mutex_lock(&finfo[i].mutex);
+
+ next_nr = (i + 1) % num_threads;
+ finfo[i].next = &finfo[next_nr];
+
+ if (pthread_create(&finfo[i].thread, NULL, check_xstate, &finfo[i]))
+ ksft_exit_fail_msg("pthread_create() failed\n");
+ }
+}
+
+static bool checkout_threads(uint32_t num_threads, struct futex_info *finfo)
+{
+ void *thread_retval;
+ bool valid = true;
+ int err, i;
+
+ for (i = 0; i < num_threads; i++) {
+ err = pthread_join(finfo[i].thread, &thread_retval);
+ if (err)
+ ksft_exit_fail_msg("pthread_join() failed for thread %d err: %d\n", i, err);
+
+ if (thread_retval != &finfo[i]) {
+ ksft_exit_fail_msg("unexpected thread retval for thread %d: %p\n",
+ i, thread_retval);
+ }
+
+ valid &= finfo[i].valid;
+ }
+
+ return valid;
+}
+
+static void affinitize_cpu0(void)
+{
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ ksft_exit_fail_msg("sched_setaffinity to CPU 0 failed\n");
+}
+
+static void test_context_switch(uint32_t num_threads, uint32_t iterations)
+{
+ struct futex_info *finfo;
+
+ /* Affinitize to one CPU to force context switches */
+ affinitize_cpu0();
+
+ printf("[RUN]\t%s: check context switches, %d iterations, %d threads.\n",
+ xstate.name, iterations, num_threads);
+
+ finfo = malloc(sizeof(*finfo) * num_threads);
+ if (!finfo)
+ ksft_exit_fail_msg("unable allocate memory\n");
+
+ create_threads(num_threads, iterations, finfo);
+
+ /*
+ * This thread wakes up thread 0
+ * Thread 0 will wake up 1
+ * Thread 1 will wake up 2
+ * ...
+ * The last thread will wake up 0
+ *
+ * This will repeat for the configured
+ * number of iterations.
+ */
+ pthread_mutex_unlock(&finfo[0].mutex);
+
+ /* Wait for all the threads to finish: */
+ if (checkout_threads(num_threads, finfo))
+ printf("[OK]\tNo incorrect case was found.\n");
+ else
+ printf("[FAIL]\tFailed with context switching test.\n");
+
+ free(finfo);
+}
+
+/*
+ * Ptrace test for the ABI format as described in arch/x86/include/asm/user.h
+ */
+
+/*
+ * Make sure the ptracee has the expanded kernel buffer on the first use.
+ * Then, initialize the state before performing the state injection from
+ * the ptracer. For non-dynamic states, this is benign.
+ */
+static inline void ptracee_touch_xstate(void)
+{
+ struct xsave_buffer *xbuf;
+
+ xbuf = alloc_xbuf();
+
+ load_rand_xstate(&xstate, xbuf);
+ load_init_xstate(&xstate, xbuf);
+
+ free(xbuf);
+}
+
+/*
+ * Ptracer injects the randomized xstate data. It also reads before and
+ * after that, which will execute the kernel's state copy functions.
+ */
+static void ptracer_inject_xstate(pid_t target)
+{
+ uint32_t xbuf_size = get_xbuf_size();
+ struct xsave_buffer *xbuf1, *xbuf2;
+ struct iovec iov;
+
+ /*
+ * Allocate buffers to keep data while ptracer can write the
+ * other buffer
+ */
+ xbuf1 = alloc_xbuf();
+ xbuf2 = alloc_xbuf();
+ if (!xbuf1 || !xbuf2)
+ ksft_exit_fail_msg("unable to allocate XSAVE buffer\n");
+
+ iov.iov_base = xbuf1;
+ iov.iov_len = xbuf_size;
+
+ if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+ ksft_exit_fail_msg("PTRACE_GETREGSET failed\n");
+
+ printf("[RUN]\t%s: inject xstate via ptrace().\n", xstate.name);
+
+ load_rand_xstate(&xstate, xbuf1);
+ copy_xstate(xbuf2, xbuf1);
+
+ if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+ ksft_exit_fail_msg("PTRACE_SETREGSET failed\n");
+
+ if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+ ksft_exit_fail_msg("PTRACE_GETREGSET failed\n");
+
+ if (*(uint64_t *)get_fpx_sw_bytes(xbuf1) == xgetbv(0))
+ printf("[OK]\t'xfeatures' in SW reserved area was correctly written\n");
+ else
+ printf("[FAIL]\t'xfeatures' in SW reserved area was not correctly written\n");
+
+ if (validate_xstate_same(xbuf2, xbuf1))
+ printf("[OK]\txstate was correctly updated.\n");
+ else
+ printf("[FAIL]\txstate was not correctly updated.\n");
+
+ free(xbuf1);
+ free(xbuf2);
+}
+
+static void test_ptrace(void)
+{
+ pid_t child;
+ int status;
+
+ child = fork();
+ if (child < 0) {
+ ksft_exit_fail_msg("fork() failed\n");
+ } else if (!child) {
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL))
+ ksft_exit_fail_msg("PTRACE_TRACEME failed\n");
+
+ ptracee_touch_xstate();
+
+ raise(SIGTRAP);
+ _exit(0);
+ }
+
+ do {
+ wait(&status);
+ } while (WSTOPSIG(status) != SIGTRAP);
+
+ ptracer_inject_xstate(child);
+
+ ptrace(PTRACE_DETACH, child, NULL, NULL);
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ ksft_exit_fail_msg("ptracee exit error\n");
+}
+
+/*
+ * Test signal delivery for the ABI compatibility.
+ * See the ABI format: arch/x86/include/uapi/asm/sigcontext.h
+ */
+
+/*
+ * Avoid using printf() in signal handlers as it is not
+ * async-signal-safe.
+ */
+#define SIGNAL_BUF_LEN 1000
+static char signal_message_buffer[SIGNAL_BUF_LEN];
+static void sig_print(char *msg)
+{
+ int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1;
+
+ strncat(signal_message_buffer, msg, left);
+}
+
+static struct xsave_buffer *stashed_xbuf;
+
+static void validate_sigfpstate(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ void *xbuf = ctx->uc_mcontext.fpregs;
+ struct _fpx_sw_bytes *sw_bytes;
+ uint32_t magic2;
+
+ /* Reset the signal message buffer: */
+ signal_message_buffer[0] = '\0';
+
+ sw_bytes = get_fpx_sw_bytes(xbuf);
+ if (sw_bytes->magic1 == FP_XSTATE_MAGIC1)
+ sig_print("[OK]\t'magic1' is valid\n");
+ else
+ sig_print("[FAIL]\t'magic1' is not valid\n");
+
+ if (get_fpx_sw_bytes_features(xbuf) & xstate.mask)
+ sig_print("[OK]\t'xfeatures' in SW reserved area is valid\n");
+ else
+ sig_print("[FAIL]\t'xfeatures' in SW reserved area is not valid\n");
+
+ if (get_xstatebv(xbuf) & xstate.mask)
+ sig_print("[OK]\t'xfeatures' in XSAVE header is valid\n");
+ else
+ sig_print("[FAIL]\t'xfeatures' in XSAVE header is not valid\n");
+
+ if (validate_xstate_same(stashed_xbuf, xbuf))
+ sig_print("[OK]\txstate delivery was successful\n");
+ else
+ sig_print("[FAIL]\txstate delivery was not successful\n");
+
+ magic2 = *(uint32_t *)(xbuf + sw_bytes->xstate_size);
+ if (magic2 == FP_XSTATE_MAGIC2)
+ sig_print("[OK]\t'magic2' is valid\n");
+ else
+ sig_print("[FAIL]\t'magic2' is not valid\n");
+
+ set_rand_data(&xstate, xbuf);
+ copy_xstate(stashed_xbuf, xbuf);
+}
+
+static void test_signal(void)
+{
+ bool valid_xstate;
+
+ /*
+ * The signal handler will access this to verify xstate context
+ * preservation.
+ */
+ stashed_xbuf = alloc_xbuf();
+ if (!stashed_xbuf)
+ ksft_exit_fail_msg("unable to allocate XSAVE buffer\n");
+
+ printf("[RUN]\t%s: load xstate and raise SIGUSR1\n", xstate.name);
+
+ sethandler(SIGUSR1, validate_sigfpstate, 0);
+
+ load_rand_xstate(&xstate, stashed_xbuf);
+
+ raise(SIGUSR1);
+
+ /*
+ * Immediately record the test result, deferring printf() to
+ * prevent unintended state contamination by that.
+ */
+ valid_xstate = validate_xregs_same(stashed_xbuf);
+ printf("%s", signal_message_buffer);
+
+ printf("[RUN]\t%s: load new xstate from sighandler and check it after sigreturn\n",
+ xstate.name);
+
+ if (valid_xstate)
+ printf("[OK]\txstate was restored correctly\n");
+ else
+ printf("[FAIL]\txstate restoration failed\n");
+
+ clearhandler(SIGUSR1);
+ free(stashed_xbuf);
+}
+
+void test_xstate(uint32_t feature_num)
+{
+ const unsigned int ctxtsw_num_threads = 5, ctxtsw_iterations = 10;
+ unsigned long features;
+ long rc;
+
+ if (!(XFEATURE_MASK_TEST_SUPPORTED & (1 << feature_num))) {
+ ksft_print_msg("The xstate test does not fully support the component %u, yet.\n",
+ feature_num);
+ return;
+ }
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features);
+ if (rc || !(features & (1 << feature_num))) {
+ ksft_print_msg("The kernel does not support feature number: %u\n", feature_num);
+ return;
+ }
+
+ xstate = get_xstate_info(feature_num);
+ if (!xstate.size || !xstate.xbuf_offset) {
+ ksft_exit_fail_msg("invalid state size/offset (%d/%d)\n",
+ xstate.size, xstate.xbuf_offset);
+ }
+
+ test_context_switch(ctxtsw_num_threads, ctxtsw_iterations);
+ test_ptrace();
+ test_signal();
+}
diff --git a/tools/testing/selftests/x86/xstate.h b/tools/testing/selftests/x86/xstate.h
new file mode 100644
index 000000000000..42af36ec852f
--- /dev/null
+++ b/tools/testing/selftests/x86/xstate.h
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef __SELFTESTS_X86_XSTATE_H
+#define __SELFTESTS_X86_XSTATE_H
+
+#include <stdint.h>
+
+#include "../kselftest.h"
+
+#define XSAVE_HDR_OFFSET 512
+#define XSAVE_HDR_SIZE 64
+
+/*
+ * List of XSAVE features Linux knows about. Copied from
+ * arch/x86/include/asm/fpu/types.h
+ */
+enum xfeature {
+ XFEATURE_FP,
+ XFEATURE_SSE,
+ XFEATURE_YMM,
+ XFEATURE_BNDREGS,
+ XFEATURE_BNDCSR,
+ XFEATURE_OPMASK,
+ XFEATURE_ZMM_Hi256,
+ XFEATURE_Hi16_ZMM,
+ XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
+ XFEATURE_PKRU,
+ XFEATURE_PASID,
+ XFEATURE_CET_USER,
+ XFEATURE_CET_KERNEL_UNUSED,
+ XFEATURE_RSRVD_COMP_13,
+ XFEATURE_RSRVD_COMP_14,
+ XFEATURE_LBR,
+ XFEATURE_RSRVD_COMP_16,
+ XFEATURE_XTILECFG,
+ XFEATURE_XTILEDATA,
+
+ XFEATURE_MAX,
+};
+
+/* Copied from arch/x86/kernel/fpu/xstate.c */
+static const char *xfeature_names[] =
+{
+ "x87 floating point registers",
+ "SSE registers",
+ "AVX registers",
+ "MPX bounds registers",
+ "MPX CSR",
+ "AVX-512 opmask",
+ "AVX-512 Hi256",
+ "AVX-512 ZMM_Hi256",
+ "Processor Trace (unused)",
+ "Protection Keys User registers",
+ "PASID state",
+ "Control-flow User registers",
+ "Control-flow Kernel registers (unused)",
+ "unknown xstate feature",
+ "unknown xstate feature",
+ "unknown xstate feature",
+ "unknown xstate feature",
+ "AMX Tile config",
+ "AMX Tile data",
+ "unknown xstate feature",
+};
+
+struct xsave_buffer {
+ union {
+ struct {
+ char legacy[XSAVE_HDR_OFFSET];
+ char header[XSAVE_HDR_SIZE];
+ char extended[0];
+ };
+ char bytes[0];
+ };
+};
+
+static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
+{
+ uint32_t rfbm_hi = rfbm >> 32;
+ uint32_t rfbm_lo = rfbm;
+
+ asm volatile("xsave (%%rdi)"
+ : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
+ : "memory");
+}
+
+static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
+{
+ uint32_t rfbm_hi = rfbm >> 32;
+ uint32_t rfbm_lo = rfbm;
+
+ asm volatile("xrstor (%%rdi)"
+ : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
+}
+
+#define CPUID_LEAF_XSTATE 0xd
+#define CPUID_SUBLEAF_XSTATE_USER 0x0
+
+static inline uint32_t get_xbuf_size(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
+ eax, ebx, ecx, edx);
+
+ /*
+ * EBX enumerates the size (in bytes) required by the XSAVE
+ * instruction for an XSAVE area containing all the user state
+ * components corresponding to bits currently set in XCR0.
+ */
+ return ebx;
+}
+
+struct xstate_info {
+ const char *name;
+ uint32_t num;
+ uint32_t mask;
+ uint32_t xbuf_offset;
+ uint32_t size;
+};
+
+static inline struct xstate_info get_xstate_info(uint32_t xfeature_num)
+{
+ struct xstate_info xstate = { };
+ uint32_t eax, ebx, ecx, edx;
+
+ if (xfeature_num >= XFEATURE_MAX) {
+ ksft_print_msg("unknown state\n");
+ return xstate;
+ }
+
+ xstate.name = xfeature_names[xfeature_num];
+ xstate.num = xfeature_num;
+ xstate.mask = 1 << xfeature_num;
+
+ __cpuid_count(CPUID_LEAF_XSTATE, xfeature_num,
+ eax, ebx, ecx, edx);
+ xstate.size = eax;
+ xstate.xbuf_offset = ebx;
+ return xstate;
+}
+
+static inline struct xsave_buffer *alloc_xbuf(void)
+{
+ uint32_t xbuf_size = get_xbuf_size();
+
+ /* XSAVE buffer should be 64B-aligned. */
+ return aligned_alloc(64, xbuf_size);
+}
+
+static inline void clear_xstate_header(struct xsave_buffer *xbuf)
+{
+ memset(&xbuf->header, 0, sizeof(xbuf->header));
+}
+
+static inline void set_xstatebv(struct xsave_buffer *xbuf, uint64_t bv)
+{
+ /* XSTATE_BV is at the beginning of the header: */
+ *(uint64_t *)(&xbuf->header) = bv;
+}
+
+/* See 'struct _fpx_sw_bytes' at sigcontext.h */
+#define SW_BYTES_OFFSET 464
+/* N.B. The struct's field name varies so read from the offset. */
+#define SW_BYTES_BV_OFFSET (SW_BYTES_OFFSET + 8)
+
+static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *xbuf)
+{
+ return xbuf + SW_BYTES_OFFSET;
+}
+
+static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
+{
+ return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
+}
+
+static inline void set_rand_data(struct xstate_info *xstate, struct xsave_buffer *xbuf)
+{
+ int *ptr = (int *)&xbuf->bytes[xstate->xbuf_offset];
+ int data, i;
+
+ /*
+ * Ensure that 'data' is never 0. This ensures that
+ * the registers are never in their initial configuration
+ * and thus never tracked as being in the init state.
+ */
+ data = rand() | 1;
+
+ for (i = 0; i < xstate->size / sizeof(int); i++, ptr++)
+ *ptr = data;
+}
+
+/* Testing kernel's context switching and ABI support for the xstate. */
+void test_xstate(uint32_t feature_num);
+
+#endif /* __SELFTESTS_X86_XSTATE_H */
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index dfff8b288265..d0f6d253ac72 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -1788,6 +1788,42 @@ static void test_stream_connect_retry_server(const struct test_opts *opts)
close(fd);
}
+static void test_stream_linger_client(const struct test_opts *opts)
+{
+ struct linger optval = {
+ .l_onoff = 1,
+ .l_linger = 1
+ };
+ int fd;
+
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &optval, sizeof(optval))) {
+ perror("setsockopt(SO_LINGER)");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static void test_stream_linger_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ vsock_wait_remote_close(fd);
+ close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -1943,6 +1979,11 @@ static struct test_case test_cases[] = {
.run_client = test_stream_connect_retry_client,
.run_server = test_stream_connect_retry_server,
},
+ {
+ .name = "SOCK_STREAM SO_LINGER null-ptr-deref",
+ .run_client = test_stream_linger_client,
+ .run_server = test_stream_linger_server,
+ },
{},
};
diff --git a/tools/thermal/lib/Makefile b/tools/thermal/lib/Makefile
index f2552f73a64c..056d212f25cf 100644
--- a/tools/thermal/lib/Makefile
+++ b/tools/thermal/lib/Makefile
@@ -39,19 +39,6 @@ libdir = $(prefix)/$(libdir_relative)
libdir_SQ = $(subst ','\'',$(libdir))
libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-ifeq ("$(origin V)", "command line")
- VERBOSE = $(V)
-endif
-ifndef VERBOSE
- VERBOSE = 0
-endif
-
-ifeq ($(VERBOSE),1)
- Q =
-else
- Q = @
-endif
-
# Set compile option CFLAGS
ifdef EXTRA_CFLAGS
CFLAGS := $(EXTRA_CFLAGS)
diff --git a/tools/tracing/latency/Makefile b/tools/tracing/latency/Makefile
index 6518b03e05c7..257a56b1899f 100644
--- a/tools/tracing/latency/Makefile
+++ b/tools/tracing/latency/Makefile
@@ -37,12 +37,6 @@ FEATURE_TESTS += libtracefs
FEATURE_DISPLAY := libtraceevent
FEATURE_DISPLAY += libtracefs
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
all: $(LATENCY-COLLECTOR)
include $(srctree)/tools/build/Makefile.include
diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile
index 8b5101457c70..0b61208db604 100644
--- a/tools/tracing/rtla/Makefile
+++ b/tools/tracing/rtla/Makefile
@@ -37,12 +37,6 @@ FEATURE_DISPLAY := libtraceevent
FEATURE_DISPLAY += libtracefs
FEATURE_DISPLAY += libcpupower
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
all: $(RTLA)
include $(srctree)/tools/build/Makefile.include
diff --git a/tools/verification/rv/Makefile b/tools/verification/rv/Makefile
index 411d62b3d8eb..5b898360ba48 100644
--- a/tools/verification/rv/Makefile
+++ b/tools/verification/rv/Makefile
@@ -35,12 +35,6 @@ FEATURE_TESTS += libtracefs
FEATURE_DISPLAY := libtraceevent
FEATURE_DISPLAY += libtracefs
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
all: $(RV)
include $(srctree)/tools/build/Makefile.include