aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-btf.rst48
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst3
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-feature.rst2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst9
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-iter.rst2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-link.rst3
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst3
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-net.rst2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-perf.rst2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst36
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst2
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst12
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool66
-rw-r--r--tools/bpf/bpftool/btf.c11
-rw-r--r--tools/bpf/bpftool/btf_dumper.c6
-rw-r--r--tools/bpf/bpftool/cgroup.c3
-rw-r--r--tools/bpf/bpftool/common.c6
-rw-r--r--tools/bpf/bpftool/feature.c1
-rw-r--r--tools/bpf/bpftool/gen.c3
-rw-r--r--tools/bpf/bpftool/iter.c2
-rw-r--r--tools/bpf/bpftool/link.c3
-rw-r--r--tools/bpf/bpftool/main.c3
-rw-r--r--tools/bpf/bpftool/main.h3
-rw-r--r--tools/bpf/bpftool/map.c19
-rw-r--r--tools/bpf/bpftool/net.c1
-rw-r--r--tools/bpf/bpftool/perf.c5
-rw-r--r--tools/bpf/bpftool/prog.c37
-rw-r--r--tools/bpf/bpftool/struct_ops.c2
-rw-r--r--tools/bpf/resolve_btfids/main.c13
-rw-r--r--tools/include/nolibc/nolibc.h15
-rw-r--r--tools/include/uapi/linux/bpf.h119
-rw-r--r--tools/include/uapi/linux/ethtool.h53
-rw-r--r--tools/include/uapi/linux/if_link.h2
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile10
-rw-r--r--tools/lib/bpf/bpf.c32
-rw-r--r--tools/lib/bpf/bpf.h8
-rw-r--r--tools/lib/bpf/btf.c47
-rw-r--r--tools/lib/bpf/btf.h31
-rw-r--r--tools/lib/bpf/btf_dump.c871
-rw-r--r--tools/lib/bpf/libbpf.c1756
-rw-r--r--tools/lib/bpf/libbpf.h76
-rw-r--r--tools/lib/bpf/libbpf.map11
-rw-r--r--tools/lib/bpf/libbpf_internal.h113
-rw-r--r--tools/lib/bpf/relo_core.c1295
-rw-r--r--tools/lib/bpf/relo_core.h100
-rw-r--r--tools/memory-model/Documentation/access-marking.txt151
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/util/bpf-event.c11
-rw-r--r--tools/perf/util/bpf_counter.c12
-rwxr-xr-xtools/testing/kunit/kunit.py36
-rw-r--r--tools/testing/kunit/kunit_parser.py6
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py29
-rw-r--r--tools/testing/scatterlist/linux/mm.h1
-rw-r--r--tools/testing/scatterlist/main.c38
-rw-r--r--tools/testing/selftests/Makefile2
-rw-r--r--tools/testing/selftests/arm64/fp/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/fp/Makefile11
-rw-r--r--tools/testing/selftests/arm64/fp/TODO4
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl-sve.c14
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl.S10
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl.h8
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c5
-rw-r--r--tools/testing/selftests/arm64/fp/vec-syscfg.c593
-rw-r--r--tools/testing/selftests/arm64/mte/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c2
-rw-r--r--tools/testing/selftests/arm64/pauth/pac.c10
-rw-r--r--tools/testing/selftests/arm64/signal/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.h2
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c3
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/TODO2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c92
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sve_regs.c126
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sve_vl.c68
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.c34
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile7
-rw-r--r--tools/testing/selftests/bpf/README.rst7
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h19
-rw-r--r--tools/testing/selftests/bpf/netcnt_common.h38
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c120
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c98
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c254
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c226
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c106
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c615
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_module.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_autosize.c22
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netcnt.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netns_cookie.c80
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_link.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c445
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c70
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_pt_regs.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_mim.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c520
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c105
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c39
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp_release.c26
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c72
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_unix.c80
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h10
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_test.c84
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/netcnt_prog.c8
-rw-r--r--tools/testing/selftests/bpf/progs/netns_cookie_prog.c84
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c39
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_cookie.c85
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_autosize.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_weak.c56
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_link.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_pt_regs.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c20
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c297
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim.c88
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim_reject.c74
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_tx.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool.sh6
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh2
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py586
-rwxr-xr-xtools/testing/selftests/bpf/test_doc_build.sh10
-rw-r--r--tools/testing/selftests/bpf/test_maps.c90
-rw-r--r--tools/testing/selftests/bpf/test_netcnt.c148
-rw-r--r--tools/testing/selftests/bpf/test_progs.c107
-rw-r--r--tools/testing/selftests/bpf/test_progs.h12
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh2
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_veth.sh2
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh10
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c87
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h4
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c681
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.h63
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh30
-rw-r--r--tools/testing/selftests/cpufreq/config2
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c17
-rw-r--r--tools/testing/selftests/firmware/fw_namespace.c3
-rw-r--r--tools/testing/selftests/lkdtm/config2
-rw-r--r--tools/testing/selftests/lkdtm/tests.txt3
-rw-r--r--tools/testing/selftests/move_mount_set_group/.gitignore1
-rw-r--r--tools/testing/selftests/move_mount_set_group/Makefile7
-rw-r--r--tools/testing/selftests/move_mount_set_group/config1
-rw-r--r--tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c375
-rw-r--r--tools/testing/selftests/nci/nci_dev.c416
-rw-r--r--tools/testing/selftests/net/Makefile5
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile5
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c437
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh33
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh7
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh15
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh27
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh2
-rw-r--r--tools/testing/selftests/net/gro.c1095
-rwxr-xr-xtools/testing/selftests/net/gro.sh99
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh652
-rw-r--r--tools/testing/selftests/net/ioam6_parser.c720
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh345
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c16
-rw-r--r--tools/testing/selftests/net/psock_fanout.c4
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh3
-rwxr-xr-xtools/testing/selftests/net/run_afpackettests5
-rwxr-xr-xtools/testing/selftests/net/setup_loopback.sh118
-rw-r--r--tools/testing/selftests/net/setup_veth.sh41
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh9
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh9
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh9
-rw-r--r--tools/testing/selftests/net/toeplitz.c585
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh199
-rwxr-xr-xtools/testing/selftests/net/toeplitz_client.sh28
-rwxr-xr-xtools/testing/selftests/net/unicast_extensions.sh5
-rwxr-xr-xtools/testing/selftests/net/veth.sh183
-rwxr-xr-xtools/testing/selftests/net/vrf_strict_mode_test.sh9
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c4
l---------tools/testing/selftests/powerpc/primitives/asm/extable.h1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c1
-rw-r--r--tools/testing/selftests/powerpc/signal/signal_tm.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-exec.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-fork.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-poison.c2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-resched-dscr.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-stack.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-sigreturn.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall.c2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tar.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tmspr.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-trap.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmxcopy.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h36
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh10
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kcsan-collapse.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh106
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh88
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh5
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh30
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh20
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh24
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh49
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh39
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh37
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS032
-rw-r--r--tools/testing/selftests/safesetid/safesetid-test.c2
-rw-r--r--tools/testing/selftests/sched/cs_prctl_test.c2
-rw-r--r--tools/testing/selftests/sync/config1
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json137
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config.py1
-rw-r--r--tools/testing/selftests/vm/.gitignore1
-rw-r--r--tools/testing/selftests/vm/Makefile3
-rw-r--r--tools/testing/selftests/vm/charge_reserved_hugetlb.sh5
-rw-r--r--tools/testing/selftests/vm/hugetlb_reparenting_test.sh5
-rw-r--r--tools/testing/selftests/vm/ksm_tests.c662
-rw-r--r--tools/testing/selftests/vm/mlock-random-test.c2
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh96
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c13
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c4
-rw-r--r--tools/testing/selftests/x86/sigreturn.c7
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c4
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c7
263 files changed, 16721 insertions, 2845 deletions
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index ff4d327a582e..88b28aa7431f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -12,7 +12,8 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **btf** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } |
+ { **-B** | **--base-btf** } }
*COMMANDS* := { **dump** | **help** }
@@ -73,6 +74,20 @@ OPTIONS
=======
.. include:: common_options.rst
+ -B, --base-btf *FILE*
+ Pass a base BTF object. Base BTF objects are typically used
+ with BTF objects for kernel modules. To avoid duplicating
+ all kernel symbols required by modules, BTF objects for
+ modules are "split", they are built incrementally on top of
+ the kernel (vmlinux) BTF object. So the base BTF reference
+ should usually point to the kernel BTF.
+
+ When the main BTF object to process (for example, the
+ module BTF to dump) is passed as a *FILE*, bpftool attempts
+ to autodetect the path for the base object, and passing
+ this option is optional. When the main BTF object is passed
+ through other handles, this option becomes necessary.
+
EXAMPLES
========
**# bpftool btf dump id 1226**
@@ -217,3 +232,34 @@ All the standard ways to specify map or program are supported:
**# bpftool btf dump prog tag b88e0a09b1d9759d**
**# bpftool btf dump prog pinned /sys/fs/bpf/prog_name**
+
+|
+| **# bpftool btf dump file /sys/kernel/btf/i2c_smbus**
+| (or)
+| **# I2C_SMBUS_ID=$(bpftool btf show -p | jq '.[] | select(.name=="i2c_smbus").id')**
+| **# bpftool btf dump id ${I2C_SMBUS_ID} -B /sys/kernel/btf/vmlinux**
+
+::
+
+ [104848] STRUCT 'i2c_smbus_alert' size=40 vlen=2
+ 'alert' type_id=393 bits_offset=0
+ 'ara' type_id=56050 bits_offset=256
+ [104849] STRUCT 'alert_data' size=12 vlen=3
+ 'addr' type_id=16 bits_offset=0
+ 'type' type_id=56053 bits_offset=32
+ 'data' type_id=7 bits_offset=64
+ [104850] PTR '(anon)' type_id=104848
+ [104851] PTR '(anon)' type_id=104849
+ [104852] FUNC 'i2c_register_spd' type_id=84745 linkage=static
+ [104853] FUNC 'smbalert_driver_init' type_id=1213 linkage=static
+ [104854] FUNC_PROTO '(anon)' ret_type_id=18 vlen=1
+ 'ara' type_id=56050
+ [104855] FUNC 'i2c_handle_smbus_alert' type_id=104854 linkage=static
+ [104856] FUNC 'smbalert_remove' type_id=104854 linkage=static
+ [104857] FUNC_PROTO '(anon)' ret_type_id=18 vlen=2
+ 'ara' type_id=56050
+ 'id' type_id=56056
+ [104858] FUNC 'smbalert_probe' type_id=104857 linkage=static
+ [104859] FUNC 'smbalert_work' type_id=9695 linkage=static
+ [104860] FUNC 'smbus_alert' type_id=71367 linkage=static
+ [104861] FUNC 'smbus_do_alert' type_id=84827 linkage=static
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index baee8591ac76..3e4395eede4f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -12,7 +12,8 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **cgroup** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-f** | **--bpffs** } }
*COMMANDS* :=
{ **show** | **list** | **tree** | **attach** | **detach** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index dd3771bdbc57..ab9f57ee4c3a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -12,7 +12,7 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **feature** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*COMMANDS* := { **probe** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 7cd6681137f3..2ef2f2df0279 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -12,7 +12,8 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **gen** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-L** | **--use-loader** } }
*COMMAND* := { **object** | **skeleton** | **help** }
@@ -152,6 +153,12 @@ OPTIONS
=======
.. include:: common_options.rst
+ -L, --use-loader
+ For skeletons, generate a "light" skeleton (also known as "loader"
+ skeleton). A light skeleton contains a loader eBPF program. It does
+ not use the majority of the libbpf infrastructure, and does not need
+ libelf.
+
EXAMPLES
========
**$ cat example1.bpf.c**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
index 51f49bead619..471f363a725a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -12,6 +12,8 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **iter** *COMMAND*
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
+
*COMMANDS* := { **pin** | **help** }
ITER COMMANDS
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index 5f7db2a837cc..0de90f086238 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -12,7 +12,8 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **link** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
*COMMANDS* := { **show** | **list** | **pin** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 3d52256ba75f..d0c4abe08aba 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -12,7 +12,8 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **map** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-f** | **--bpffs** } | { **-n** | **--nomount** } }
*COMMANDS* :=
{ **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index d8165d530937..1ae0375e8fea 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -12,7 +12,7 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **net** *COMMAND*
- *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*COMMANDS* :=
{ **show** | **list** | **attach** | **detach** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index e958ce91de72..ce52798a917d 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -12,7 +12,7 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **perf** *COMMAND*
- *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*COMMANDS* :=
{ **show** | **list** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index fe1b38e7e887..91608cb7e44a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -12,7 +12,9 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **prog** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } |
+ { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
+ { **-L** | **--use-loader** } }
*COMMANDS* :=
{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load**
@@ -48,10 +50,11 @@ PROG COMMANDS
| **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup**
| }
| *ATTACH_TYPE* := {
-| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
+| **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
| }
| *METRICs* := {
-| **cycles** | **instructions** | **l1d_loads** | **llc_misses**
+| **cycles** | **instructions** | **l1d_loads** | **llc_misses** |
+| **itlb_misses** | **dtlb_misses**
| }
@@ -223,6 +226,20 @@ OPTIONS
Do not automatically attempt to mount any virtual file system
(such as tracefs or BPF virtual file system) when necessary.
+ -L, --use-loader
+ Load program as a "loader" program. This is useful to debug
+ the generation of such programs. When this option is in
+ use, bpftool attempts to load the programs from the object
+ file into the kernel, but does not pin them (therefore, the
+ *PATH* must not be provided).
+
+ When combined with the **-d**\ \|\ **--debug** option,
+ additional debug messages are generated, and the execution
+ of the loader program will use the **bpf_trace_printk**\ ()
+ helper to log each step of loading BTF, creating the maps,
+ and loading the programs (see **bpftool prog tracelog** as
+ a way to dump those messages).
+
EXAMPLES
========
**# bpftool prog show**
@@ -326,3 +343,16 @@ EXAMPLES
40176203 cycles (83.05%)
42518139 instructions # 1.06 insns per cycle (83.39%)
123 llc_misses # 2.89 LLC misses per million insns (83.15%)
+
+|
+| Output below is for the trace logs.
+| Run in separate terminals:
+| **# bpftool prog tracelog**
+| **# bpftool prog load -L -d file.o**
+
+::
+
+ bpftool-620059 [004] d... 2634685.517903: bpf_trace_printk: btf_load size 665 r=5
+ bpftool-620059 [004] d... 2634685.517912: bpf_trace_printk: map_create sample_map idx 0 type 2 value_size 4 value_btf_id 0 r=6
+ bpftool-620059 [004] d... 2634685.517997: bpf_trace_printk: prog_load sample insn_cnt 13 r=7
+ bpftool-620059 [004] d... 2634685.517999: bpf_trace_printk: close(5) = 0
diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
index 506e70ee78e9..02afc0fc14cb 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
@@ -12,7 +12,7 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **struct_ops** *COMMAND*
- *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*COMMANDS* :=
{ **show** | **list** | **dump** | **register** | **unregister** | **help** }
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index e7d949334961..bb23f55bb05a 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -18,15 +18,15 @@ SYNOPSIS
*OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** }
- *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
- | { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+ *OPTIONS* := { { **-V** | **--version** } |
+ { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } }
*MAP-COMMANDS* :=
- { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext**
- | **delete** | **pin** | **event_pipe** | **help** }
+ { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** |
+ **delete** | **pin** | **event_pipe** | **help** }
- *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
- | **load** | **attach** | **detach** | **help** }
+ *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** |
+ **load** | **attach** | **detach** | **help** }
*CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index cc33c5824a2f..88e2bcf16cca 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -260,7 +260,8 @@ _bpftool()
# Deal with options
if [[ ${words[cword]} == -* ]]; then
- local c='--version --json --pretty --bpffs --mapcompat --debug'
+ local c='--version --json --pretty --bpffs --mapcompat --debug \
+ --use-loader --base-btf'
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
return 0
fi
@@ -278,7 +279,7 @@ _bpftool()
_sysfs_get_netdevs
return 0
;;
- file|pinned)
+ file|pinned|-B|--base-btf)
_filedir
return 0
;;
@@ -291,7 +292,8 @@ _bpftool()
# Remove all options so completions don't have to deal with them.
local i
for (( i=1; i < ${#words[@]}; )); do
- if [[ ${words[i]::1} == - ]]; then
+ if [[ ${words[i]::1} == - ]] &&
+ [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then
words=( "${words[@]:0:i}" "${words[@]:i+1}" )
[[ $i -le $cword ]] && cword=$(( cword - 1 ))
else
@@ -343,7 +345,8 @@ _bpftool()
local PROG_TYPE='id pinned tag name'
local MAP_TYPE='id pinned name'
- local METRIC_TYPE='cycles instructions l1d_loads llc_misses'
+ local METRIC_TYPE='cycles instructions l1d_loads llc_misses \
+ itlb_misses dtlb_misses'
case $command in
show|list)
[[ $prev != "$command" ]] && return 0
@@ -404,8 +407,10 @@ _bpftool()
return 0
;;
5)
- COMPREPLY=( $( compgen -W 'msg_verdict stream_verdict \
- stream_parser flow_dissector' -- "$cur" ) )
+ local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \
+ skb_verdict stream_verdict stream_parser \
+ flow_dissector'
+ COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) )
return 0
;;
6)
@@ -464,7 +469,7 @@ _bpftool()
case $prev in
type)
- COMPREPLY=( $( compgen -W "socket kprobe \
+ local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \
kretprobe classifier flow_dissector \
action tracepoint raw_tracepoint \
xdp perf_event cgroup/skb cgroup/sock \
@@ -479,8 +484,8 @@ _bpftool()
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
cgroup/setsockopt cgroup/sock_release struct_ops \
- fentry fexit freplace sk_lookup" -- \
- "$cur" ) )
+ fentry fexit freplace sk_lookup'
+ COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_LOAD_TYPES" -- "$cur" ) )
return 0
;;
id)
@@ -698,15 +703,15 @@ _bpftool()
return 0
;;
type)
- COMPREPLY=( $( compgen -W 'hash array prog_array \
- perf_event_array percpu_hash percpu_array \
- stack_trace cgroup_array lru_hash \
+ local BPFTOOL_MAP_CREATE_TYPES='hash array \
+ prog_array perf_event_array percpu_hash \
+ percpu_array stack_trace cgroup_array lru_hash \
lru_percpu_hash lpm_trie array_of_maps \
hash_of_maps devmap devmap_hash sockmap cpumap \
xskmap sockhash cgroup_storage reuseport_sockarray \
percpu_cgroup_storage queue stack sk_storage \
- struct_ops inode_storage task_storage' -- \
- "$cur" ) )
+ struct_ops inode_storage task_storage ringbuf'
+ COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) )
return 0
;;
key|value|flags|entries)
@@ -1017,34 +1022,37 @@ _bpftool()
return 0
;;
attach|detach)
- local ATTACH_TYPES='ingress egress sock_create sock_ops \
- device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
+ local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \
+ sock_create sock_ops device \
+ bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
getpeername4 getpeername6 getsockname4 getsockname6 \
sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
setsockopt sock_release'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag name'
- case $prev in
- $command)
- _filedir
- return 0
- ;;
- ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
- post_bind4|post_bind6|connect4|connect6|getpeername4|\
- getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
- recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release)
+ # Check for $prev = $command first
+ if [ $prev = $command ]; then
+ _filedir
+ return 0
+ # Then check for attach type. This is done outside of the
+ # "case $prev in" to avoid writing the whole list of attach
+ # types again as pattern to match (where we cannot reuse
+ # our variable).
+ elif [[ $BPFTOOL_CGROUP_ATTACH_TYPES =~ $prev ]]; then
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
"$cur" ) )
return 0
- ;;
+ fi
+ # case/esac for the other cases
+ case $prev in
id)
_bpftool_get_prog_ids
return 0
;;
*)
- if ! _bpftool_search_list "$ATTACH_TYPES"; then
- COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \
- "$cur" ) )
+ if ! _bpftool_search_list "$BPFTOOL_CGROUP_ATTACH_TYPES"; then
+ COMPREPLY=( $( compgen -W \
+ "$BPFTOOL_CGROUP_ATTACH_TYPES" -- "$cur" ) )
elif [[ "$command" == "attach" ]]; then
# We have an attach type on the command line,
# but it is not the previous word, or
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 385d5c955cf3..f7e5ff3586c9 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -580,16 +580,12 @@ static int do_dump(int argc, char **argv)
}
if (!btf) {
- err = btf__get_from_id(btf_id, &btf);
+ btf = btf__load_from_kernel_by_id_split(btf_id, base_btf);
+ err = libbpf_get_error(btf);
if (err) {
p_err("get btf by id (%u): %s", btf_id, strerror(err));
goto done;
}
- if (!btf) {
- err = -ENOENT;
- p_err("can't find btf with ID (%u)", btf_id);
- goto done;
- }
}
if (dump_c) {
@@ -985,7 +981,8 @@ static int do_help(int argc, char **argv)
" FORMAT := { raw | c }\n"
" " HELP_SPEC_MAP "\n"
" " HELP_SPEC_PROGRAM "\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-B|--base-btf} }\n"
"",
bin_name, "btf");
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 7ca54d046362..9c25286a5c73 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -64,8 +64,10 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
}
info = &prog_info->info;
- if (!info->btf_id || !info->nr_func_info ||
- btf__get_from_id(info->btf_id, &prog_btf))
+ if (!info->btf_id || !info->nr_func_info)
+ goto print;
+ prog_btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(prog_btf))
goto print;
finfo = u64_to_ptr(info->func_info);
func_type = btf__type_by_id(prog_btf, finfo->type_id);
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 6e53b1d393f4..3571a281c43f 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -501,7 +501,8 @@ static int do_help(int argc, char **argv)
HELP_SPEC_ATTACH_TYPES "\n"
" " HELP_SPEC_ATTACH_FLAGS "\n"
" " HELP_SPEC_PROGRAM "\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-f|--bpffs} }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index dc6daa193557..d42d930a3ec4 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -67,6 +67,12 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
[BPF_MODIFY_RETURN] = "mod_ret",
[BPF_LSM_MAC] = "lsm_mac",
[BPF_SK_LOOKUP] = "sk_lookup",
+ [BPF_TRACE_ITER] = "trace_iter",
+ [BPF_XDP_DEVMAP] = "xdp_devmap",
+ [BPF_XDP_CPUMAP] = "xdp_cpumap",
+ [BPF_XDP] = "xdp",
+ [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select",
+ [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate",
};
void p_err(const char *fmt, ...)
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 40a88df275f9..7f36385aa9e2 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -1005,6 +1005,7 @@ static int do_help(int argc, char **argv)
" %1$s %2$s help\n"
"\n"
" COMPONENT := { kernel | dev NAME }\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 1d71ff8c52fa..d40d92bbf0e4 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -1026,7 +1026,8 @@ static int do_help(int argc, char **argv)
" %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
" %1$s %2$s help\n"
"\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-L|--use-loader} }\n"
"",
bin_name, "gen");
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index 3b1aad7535dd..84a9b01d956d 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -97,7 +97,9 @@ static int do_help(int argc, char **argv)
fprintf(stderr,
"Usage: %1$s %2$s pin OBJ PATH [map MAP]\n"
" %1$s %2$s help\n"
+ "\n"
" " HELP_SPEC_MAP "\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, "iter");
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index e77e1525d20a..8cc3e36f8cc6 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -401,7 +401,8 @@ static int do_help(int argc, char **argv)
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_LINK "\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-f|--bpffs} | {-n|--nomount} }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 3ddfd4843738..02eaaf065f65 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -64,7 +64,8 @@ static int do_help(int argc, char **argv)
" %s version\n"
"\n"
" OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-V|--version} }\n"
"",
bin_name, bin_name, bin_name);
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index c1cf29798b99..90caa42aac4c 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -57,8 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
#define HELP_SPEC_PROGRAM \
"PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }"
#define HELP_SPEC_OPTIONS \
- "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \
- "\t {-m|--mapcompat} | {-n|--nomount} }"
+ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
#define HELP_SPEC_MAP \
"MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
#define HELP_SPEC_LINK \
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 09ae0381205b..407071d54ab1 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -807,10 +807,11 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info)
} else if (info->btf_value_type_id) {
int err;
- err = btf__get_from_id(info->btf_id, &btf);
- if (err || !btf) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ err = libbpf_get_error(btf);
+ if (err) {
p_err("failed to get btf");
- btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH);
+ btf = ERR_PTR(err);
}
}
@@ -1039,11 +1040,10 @@ static void print_key_value(struct bpf_map_info *info, void *key,
void *value)
{
json_writer_t *btf_wtr;
- struct btf *btf = NULL;
- int err;
+ struct btf *btf;
- err = btf__get_from_id(info->btf_id, &btf);
- if (err) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
p_err("failed to get btf");
return;
}
@@ -1466,8 +1466,9 @@ static int do_help(int argc, char **argv)
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
" queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
- " task_storage }\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " task_storage }\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-f|--bpffs} | {-n|--nomount} }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index f836d115d7d6..649053704bd7 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -729,6 +729,7 @@ static int do_help(int argc, char **argv)
"\n"
" " HELP_SPEC_PROGRAM "\n"
" ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"\n"
"Note: Only xdp and tc attachments are supported now.\n"
" For progs attached to cgroups, use \"bpftool cgroup\"\n"
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
index ad23934819c7..50de087b0db7 100644
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
@@ -231,7 +231,10 @@ static int do_show(int argc, char **argv)
static int do_help(int argc, char **argv)
{
fprintf(stderr,
- "Usage: %1$s %2$s { show | list | help }\n"
+ "Usage: %1$s %2$s { show | list }\n"
+ " %1$s %2$s help }\n"
+ "\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index cc48726740ad..9c3e343b7d87 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -249,10 +249,10 @@ static void show_prog_metadata(int fd, __u32 num_maps)
struct bpf_map_info map_info;
struct btf_var_secinfo *vsi;
bool printed_header = false;
- struct btf *btf = NULL;
unsigned int i, vlen;
void *value = NULL;
const char *name;
+ struct btf *btf;
int err;
if (!num_maps)
@@ -263,8 +263,8 @@ static void show_prog_metadata(int fd, __u32 num_maps)
if (!value)
return;
- err = btf__get_from_id(map_info.btf_id, &btf);
- if (err || !btf)
+ btf = btf__load_from_kernel_by_id(map_info.btf_id);
+ if (libbpf_get_error(btf))
goto out_free;
t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id);
@@ -646,9 +646,12 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
member_len = info->xlated_prog_len;
}
- if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) {
- p_err("failed to get btf");
- return -1;
+ if (info->btf_id) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
+ p_err("failed to get btf");
+ return -1;
+ }
}
func_info = u64_to_ptr(info->func_info);
@@ -781,6 +784,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
kernel_syms_destroy(&dd);
}
+ btf__free(btf);
+
return 0;
}
@@ -2002,8 +2007,8 @@ static char *profile_target_name(int tgt_fd)
struct bpf_prog_info_linear *info_linear;
struct bpf_func_info *func_info;
const struct btf_type *t;
+ struct btf *btf = NULL;
char *name = NULL;
- struct btf *btf;
info_linear = bpf_program__get_prog_info_linear(
tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
@@ -2012,12 +2017,17 @@ static char *profile_target_name(int tgt_fd)
return NULL;
}
- if (info_linear->info.btf_id == 0 ||
- btf__get_from_id(info_linear->info.btf_id, &btf)) {
+ if (info_linear->info.btf_id == 0) {
p_err("prog FD %d doesn't have valid btf", tgt_fd);
goto out;
}
+ btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+ if (libbpf_get_error(btf)) {
+ p_err("failed to load btf for prog FD %d", tgt_fd);
+ goto out;
+ }
+
func_info = u64_to_ptr(info_linear->info.func_info);
t = btf__type_by_id(btf, func_info[0].type_id);
if (!t) {
@@ -2027,6 +2037,7 @@ static char *profile_target_name(int tgt_fd)
}
name = strdup(btf__name_by_offset(btf, t->name_off));
out:
+ btf__free(btf);
free(info_linear);
return name;
}
@@ -2245,10 +2256,12 @@ static int do_help(int argc, char **argv)
" cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
" cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n"
" struct_ops | fentry | fexit | freplace | sk_lookup }\n"
- " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
- " flow_dissector }\n"
+ " ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n"
+ " stream_parser | flow_dissector }\n"
" METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
- " " HELP_SPEC_OPTIONS "\n"
+ " " HELP_SPEC_OPTIONS " |\n"
+ " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
+ " {-L|--use-loader} }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index b58b91f62ffb..ab2d2290569a 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -572,8 +572,8 @@ static int do_help(int argc, char **argv)
" %1$s %2$s unregister STRUCT_OPS_MAP\n"
" %1$s %2$s help\n"
"\n"
- " OPTIONS := { {-j|--json} [{-p|--pretty}] }\n"
" STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n"
+ " " HELP_SPEC_OPTIONS " }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 3ad9301b0f00..de6365b53c9c 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -291,7 +291,7 @@ static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh)
sh->sh_addralign = expected;
if (gelf_update_shdr(scn, sh) == 0) {
- printf("FAILED cannot update section header: %s\n",
+ pr_err("FAILED cannot update section header: %s\n",
elf_errmsg(-1));
return -1;
}
@@ -317,6 +317,7 @@ static int elf_collect(struct object *obj)
elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
if (!elf) {
+ close(fd);
pr_err("FAILED cannot create ELF descriptor: %s\n",
elf_errmsg(-1));
return -1;
@@ -484,7 +485,7 @@ static int symbols_resolve(struct object *obj)
err = libbpf_get_error(btf);
if (err) {
pr_err("FAILED: load BTF from %s: %s\n",
- obj->path, strerror(-err));
+ obj->btf ?: obj->path, strerror(-err));
return -1;
}
@@ -555,8 +556,7 @@ static int id_patch(struct object *obj, struct btf_id *id)
int i;
if (!id->id) {
- pr_err("FAILED unresolved symbol %s\n", id->name);
- return -EINVAL;
+ pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name);
}
for (i = 0; i < id->addr_cnt; i++) {
@@ -734,8 +734,9 @@ int main(int argc, const char **argv)
err = 0;
out:
- if (obj.efile.elf)
+ if (obj.efile.elf) {
elf_end(obj.efile.elf);
- close(obj.efile.fd);
+ close(obj.efile.fd);
+ }
return err;
}
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 8b7a9830dd22..3430667b0d24 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -1031,7 +1031,7 @@ struct sys_stat_struct {
* scall32-o32.S in the kernel sources.
* - the system call is performed by calling "syscall"
* - syscall return comes in v0, and register a3 needs to be checked to know
- * if an error occured, in which case errno is in v0.
+ * if an error occurred, in which case errno is in v0.
* - the arguments are cast to long and assigned into the target registers
* which are then simply passed as registers to the asm code, so that we
* don't have to experience issues with register constraints.
@@ -2244,6 +2244,19 @@ unsigned int sleep(unsigned int seconds)
}
static __attribute__((unused))
+int msleep(unsigned int msecs)
+{
+ struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
+
+ if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ return (my_timeval.tv_sec * 1000) +
+ (my_timeval.tv_usec / 1000) +
+ !!(my_timeval.tv_usec % 1000);
+ else
+ return 0;
+}
+
+static __attribute__((unused))
int stat(const char *path, struct stat *buf)
{
int ret = sys_stat(path, buf);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index bf9252c7381e..791f31dd0abe 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -84,7 +84,7 @@ struct bpf_lpm_trie_key {
struct bpf_cgroup_storage_key {
__u64 cgroup_inode_id; /* cgroup inode id */
- __u32 attach_type; /* program attach type */
+ __u32 attach_type; /* program attach type (enum bpf_attach_type) */
};
union bpf_iter_link_info {
@@ -324,9 +324,6 @@ union bpf_iter_link_info {
* **BPF_PROG_TYPE_SK_LOOKUP**
* *data_in* and *data_out* must be NULL.
*
- * **BPF_PROG_TYPE_XDP**
- * *ctx_in* and *ctx_out* must be NULL.
- *
* **BPF_PROG_TYPE_RAW_TRACEPOINT**,
* **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
*
@@ -996,6 +993,7 @@ enum bpf_attach_type {
BPF_SK_SKB_VERDICT,
BPF_SK_REUSEPORT_SELECT,
BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
+ BPF_PERF_EVENT,
__MAX_BPF_ATTACH_TYPE
};
@@ -1009,6 +1007,7 @@ enum bpf_link_type {
BPF_LINK_TYPE_ITER = 4,
BPF_LINK_TYPE_NETNS = 5,
BPF_LINK_TYPE_XDP = 6,
+ BPF_LINK_TYPE_PERF_EVENT = 7,
MAX_BPF_LINK_TYPE,
};
@@ -1449,6 +1448,13 @@ union bpf_attr {
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
};
+ struct {
+ /* black box user-provided value passed through
+ * to BPF program at the execution time and
+ * accessible through bpf_get_attach_cookie() BPF helper
+ */
+ __u64 bpf_cookie;
+ } perf_event;
};
} link_create;
@@ -3249,7 +3255,7 @@ union bpf_attr {
* long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
* Description
* Select a **SO_REUSEPORT** socket from a
- * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ * **BPF_MAP_TYPE_REUSEPORT_SOCKARRAY** *map*.
* It checks the selected socket is matching the incoming
* request in the socket buffer.
* Return
@@ -4780,6 +4786,97 @@ union bpf_attr {
* Execute close syscall for given FD.
* Return
* A syscall result.
+ *
+ * long bpf_timer_init(struct bpf_timer *timer, struct bpf_map *map, u64 flags)
+ * Description
+ * Initialize the timer.
+ * First 4 bits of *flags* specify clockid.
+ * Only CLOCK_MONOTONIC, CLOCK_REALTIME, CLOCK_BOOTTIME are allowed.
+ * All other bits of *flags* are reserved.
+ * The verifier will reject the program if *timer* is not from
+ * the same *map*.
+ * Return
+ * 0 on success.
+ * **-EBUSY** if *timer* is already initialized.
+ * **-EINVAL** if invalid *flags* are passed.
+ * **-EPERM** if *timer* is in a map that doesn't have any user references.
+ * The user space should either hold a file descriptor to a map with timers
+ * or pin such map in bpffs. When map is unpinned or file descriptor is
+ * closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn)
+ * Description
+ * Configure the timer to call *callback_fn* static function.
+ * Return
+ * 0 on success.
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ * **-EPERM** if *timer* is in a map that doesn't have any user references.
+ * The user space should either hold a file descriptor to a map with timers
+ * or pin such map in bpffs. When map is unpinned or file descriptor is
+ * closed all timers in the map will be cancelled and freed.
+ *
+ * long bpf_timer_start(struct bpf_timer *timer, u64 nsecs, u64 flags)
+ * Description
+ * Set timer expiration N nanoseconds from the current time. The
+ * configured callback will be invoked in soft irq context on some cpu
+ * and will not repeat unless another bpf_timer_start() is made.
+ * In such case the next invocation can migrate to a different cpu.
+ * Since struct bpf_timer is a field inside map element the map
+ * owns the timer. The bpf_timer_set_callback() will increment refcnt
+ * of BPF program to make sure that callback_fn code stays valid.
+ * When user space reference to a map reaches zero all timers
+ * in a map are cancelled and corresponding program's refcnts are
+ * decremented. This is done to make sure that Ctrl-C of a user
+ * process doesn't leave any timers running. If map is pinned in
+ * bpffs the callback_fn can re-arm itself indefinitely.
+ * bpf_map_update/delete_elem() helpers and user space sys_bpf commands
+ * cancel and free the timer in the given map element.
+ * The map can contain timers that invoke callback_fn-s from different
+ * programs. The same callback_fn can serve different timers from
+ * different maps if key/value layout matches across maps.
+ * Every bpf_timer_set_callback() can have different callback_fn.
+ *
+ * Return
+ * 0 on success.
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier
+ * or invalid *flags* are passed.
+ *
+ * long bpf_timer_cancel(struct bpf_timer *timer)
+ * Description
+ * Cancel the timer and wait for callback_fn to finish if it was running.
+ * Return
+ * 0 if the timer was not active.
+ * 1 if the timer was active.
+ * **-EINVAL** if *timer* was not initialized with bpf_timer_init() earlier.
+ * **-EDEADLK** if callback_fn tried to call bpf_timer_cancel() on its
+ * own timer which would have led to a deadlock otherwise.
+ *
+ * u64 bpf_get_func_ip(void *ctx)
+ * Description
+ * Get address of the traced function (for tracing and kprobe programs).
+ * Return
+ * Address of the traced function.
+ *
+ * u64 bpf_get_attach_cookie(void *ctx)
+ * Description
+ * Get bpf_cookie value provided (optionally) during the program
+ * attachment. It might be different for each individual
+ * attachment, even if BPF program itself is the same.
+ * Expects BPF program context *ctx* as a first argument.
+ *
+ * Supported for the following program types:
+ * - kprobe/uprobe;
+ * - tracepoint;
+ * - perf_event.
+ * Return
+ * Value specified by user at BPF link creation/attachment time
+ * or 0, if it was not specified.
+ *
+ * long bpf_task_pt_regs(struct task_struct *task)
+ * Description
+ * Get the struct pt_regs associated with **task**.
+ * Return
+ * A pointer to struct pt_regs.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4951,6 +5048,13 @@ union bpf_attr {
FN(sys_bpf), \
FN(btf_find_by_name_kind), \
FN(sys_close), \
+ FN(timer_init), \
+ FN(timer_set_callback), \
+ FN(timer_start), \
+ FN(timer_cancel), \
+ FN(get_func_ip), \
+ FN(get_attach_cookie), \
+ FN(task_pt_regs), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6077,6 +6181,11 @@ struct bpf_spin_lock {
__u32 val;
};
+struct bpf_timer {
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
diff --git a/tools/include/uapi/linux/ethtool.h b/tools/include/uapi/linux/ethtool.h
index c86c3e942df9..47afae3895ec 100644
--- a/tools/include/uapi/linux/ethtool.h
+++ b/tools/include/uapi/linux/ethtool.h
@@ -48,4 +48,57 @@ struct ethtool_channels {
__u32 combined_count;
};
+#define ETHTOOL_FWVERS_LEN 32
+#define ETHTOOL_BUSINFO_LEN 32
+#define ETHTOOL_EROMVERS_LEN 32
+
+/**
+ * struct ethtool_drvinfo - general driver and device information
+ * @cmd: Command number = %ETHTOOL_GDRVINFO
+ * @driver: Driver short name. This should normally match the name
+ * in its bus driver structure (e.g. pci_driver::name). Must
+ * not be an empty string.
+ * @version: Driver version string; may be an empty string
+ * @fw_version: Firmware version string; may be an empty string
+ * @erom_version: Expansion ROM version string; may be an empty string
+ * @bus_info: Device bus address. This should match the dev_name()
+ * string for the underlying bus device, if there is one. May be
+ * an empty string.
+ * @reserved2: Reserved for future use; see the note on reserved space.
+ * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
+ * %ETHTOOL_SPFLAGS commands; also the number of strings in the
+ * %ETH_SS_PRIV_FLAGS set
+ * @n_stats: Number of u64 statistics returned by the %ETHTOOL_GSTATS
+ * command; also the number of strings in the %ETH_SS_STATS set
+ * @testinfo_len: Number of results returned by the %ETHTOOL_TEST
+ * command; also the number of strings in the %ETH_SS_TEST set
+ * @eedump_len: Size of EEPROM accessible through the %ETHTOOL_GEEPROM
+ * and %ETHTOOL_SEEPROM commands, in bytes
+ * @regdump_len: Size of register dump returned by the %ETHTOOL_GREGS
+ * command, in bytes
+ *
+ * Users can use the %ETHTOOL_GSSET_INFO command to get the number of
+ * strings in any string set (from Linux 2.6.34).
+ *
+ * Drivers should set at most @driver, @version, @fw_version and
+ * @bus_info in their get_drvinfo() implementation. The ethtool
+ * core fills in the other fields using other driver operations.
+ */
+struct ethtool_drvinfo {
+ __u32 cmd;
+ char driver[32];
+ char version[32];
+ char fw_version[ETHTOOL_FWVERS_LEN];
+ char bus_info[ETHTOOL_BUSINFO_LEN];
+ char erom_version[ETHTOOL_EROMVERS_LEN];
+ char reserved2[12];
+ __u32 n_priv_flags;
+ __u32 n_stats;
+ __u32 testinfo_len;
+ __u32 eedump_len;
+ __u32 regdump_len;
+};
+
+#define ETHTOOL_GDRVINFO 0x00000003
+
#endif /* _UAPI_LINUX_ETHTOOL_H */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index d208b2af697f..b3610fdd1fee 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -230,6 +230,7 @@ enum {
IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */
IFLA_INET6_TOKEN, /* device token */
IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+ IFLA_INET6_RA_MTU, /* mtu carried in the RA message */
__IFLA_INET6_MAX
};
@@ -653,6 +654,7 @@ enum {
IFLA_BOND_AD_ACTOR_SYSTEM,
IFLA_BOND_TLB_DYNAMIC_LB,
IFLA_BOND_PEER_NOTIF_DELAY,
+ IFLA_BOND_AD_LACP_ACTIVE,
__IFLA_BOND_MAX,
};
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 430f6874fa41..94f0a146bb7b 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o ringbuf.o strset.o linker.o gen_loader.o
+ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index ec14aa725bb0..74c3b73a5fbe 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -4,8 +4,9 @@
RM ?= rm
srctree = $(abs_srctree)
+VERSION_SCRIPT := libbpf.map
LIBBPF_VERSION := $(shell \
- grep -oE '^LIBBPF_([0-9.]+)' libbpf.map | \
+ grep -oE '^LIBBPF_([0-9.]+)' $(VERSION_SCRIPT) | \
sort -rV | head -n1 | cut -d'_' -f2)
LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
@@ -110,7 +111,6 @@ SHARED_OBJDIR := $(OUTPUT)sharedobjs/
STATIC_OBJDIR := $(OUTPUT)staticobjs/
BPF_IN_SHARED := $(SHARED_OBJDIR)libbpf-in.o
BPF_IN_STATIC := $(STATIC_OBJDIR)libbpf-in.o
-VERSION_SCRIPT := libbpf.map
BPF_HELPER_DEFS := $(OUTPUT)bpf_helper_defs.h
LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
@@ -163,10 +163,10 @@ $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
-$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
+$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT)
$(QUIET_LINK)$(CC) $(LDFLAGS) \
--shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
- -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -lz -o $@
+ -Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@
@ln -sf $(@F) $(OUTPUT)libbpf.so
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
@@ -181,7 +181,7 @@ $(OUTPUT)libbpf.pc:
check: check_abi
-check_abi: $(OUTPUT)libbpf.so
+check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
@if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then \
echo "Warning: Num of global symbols in $(BPF_IN_SHARED)" \
"($(GLOBAL_SYM_COUNT)) does NOT match with num of" \
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 86dcac44f32f..2401fad090c5 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -684,8 +684,13 @@ int bpf_link_create(int prog_fd, int target_fd,
iter_info_len = OPTS_GET(opts, iter_info_len, 0);
target_btf_id = OPTS_GET(opts, target_btf_id, 0);
- if (iter_info_len && target_btf_id)
- return libbpf_err(-EINVAL);
+ /* validate we don't have unexpected combinations of non-zero fields */
+ if (iter_info_len || target_btf_id) {
+ if (iter_info_len && target_btf_id)
+ return libbpf_err(-EINVAL);
+ if (!OPTS_ZEROED(opts, target_btf_id))
+ return libbpf_err(-EINVAL);
+ }
memset(&attr, 0, sizeof(attr));
attr.link_create.prog_fd = prog_fd;
@@ -693,14 +698,27 @@ int bpf_link_create(int prog_fd, int target_fd,
attr.link_create.attach_type = attach_type;
attr.link_create.flags = OPTS_GET(opts, flags, 0);
- if (iter_info_len) {
- attr.link_create.iter_info =
- ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
- attr.link_create.iter_info_len = iter_info_len;
- } else if (target_btf_id) {
+ if (target_btf_id) {
attr.link_create.target_btf_id = target_btf_id;
+ goto proceed;
}
+ switch (attach_type) {
+ case BPF_TRACE_ITER:
+ attr.link_create.iter_info = ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+ attr.link_create.iter_info_len = iter_info_len;
+ break;
+ case BPF_PERF_EVENT:
+ attr.link_create.perf_event.bpf_cookie = OPTS_GET(opts, perf_event.bpf_cookie, 0);
+ if (!OPTS_ZEROED(opts, perf_event))
+ return libbpf_err(-EINVAL);
+ break;
+ default:
+ if (!OPTS_ZEROED(opts, flags))
+ return libbpf_err(-EINVAL);
+ break;
+ }
+proceed:
fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
return libbpf_err_errno(fd);
}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 4f758f8f50cd..6fffb3cdf39b 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -177,8 +177,14 @@ struct bpf_link_create_opts {
union bpf_iter_link_info *iter_info;
__u32 iter_info_len;
__u32 target_btf_id;
+ union {
+ struct {
+ __u64 bpf_cookie;
+ } perf_event;
+ };
+ size_t :0;
};
-#define bpf_link_create_opts__last_field target_btf_id
+#define bpf_link_create_opts__last_field perf_event
LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 7ff3d5ce44f9..77dc24d58302 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1179,7 +1179,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
-int btf__load(struct btf *btf)
+int btf__load_into_kernel(struct btf *btf)
{
__u32 log_buf_size = 0, raw_size;
char *log_buf = NULL;
@@ -1227,6 +1227,7 @@ done:
free(log_buf);
return libbpf_err(err);
}
+int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel")));
int btf__fd(const struct btf *btf)
{
@@ -1381,21 +1382,35 @@ exit_free:
return btf;
}
-int btf__get_from_id(__u32 id, struct btf **btf)
+struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf)
{
- struct btf *res;
- int err, btf_fd;
+ struct btf *btf;
+ int btf_fd;
- *btf = NULL;
btf_fd = bpf_btf_get_fd_by_id(id);
if (btf_fd < 0)
- return libbpf_err(-errno);
-
- res = btf_get_from_fd(btf_fd, NULL);
- err = libbpf_get_error(res);
+ return libbpf_err_ptr(-errno);
+ btf = btf_get_from_fd(btf_fd, base_btf);
close(btf_fd);
+ return libbpf_ptr(btf);
+}
+
+struct btf *btf__load_from_kernel_by_id(__u32 id)
+{
+ return btf__load_from_kernel_by_id_split(id, NULL);
+}
+
+int btf__get_from_id(__u32 id, struct btf **btf)
+{
+ struct btf *res;
+ int err;
+
+ *btf = NULL;
+ res = btf__load_from_kernel_by_id(id);
+ err = libbpf_get_error(res);
+
if (err)
return libbpf_err(err);
@@ -4020,7 +4035,7 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
*/
if (d->hypot_adjust_canon)
continue;
-
+
if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
d->map[t_id] = c_id;
@@ -4393,7 +4408,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
* Probe few well-known locations for vmlinux kernel image and try to load BTF
* data out of it to use for target BTF.
*/
-struct btf *libbpf_find_kernel_btf(void)
+struct btf *btf__load_vmlinux_btf(void)
{
struct {
const char *path_fmt;
@@ -4439,6 +4454,16 @@ struct btf *libbpf_find_kernel_btf(void)
return libbpf_err_ptr(-ESRCH);
}
+struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf")));
+
+struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf)
+{
+ char path[80];
+
+ snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name);
+ return btf__parse_split(path, vmlinux_btf);
+}
+
int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
{
int i, n, err;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index b54f1c3ebd57..4a711f990904 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -44,8 +44,17 @@ LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_b
LIBBPF_API struct btf *btf__parse_raw(const char *path);
LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf);
+LIBBPF_API struct btf *btf__load_vmlinux_btf(void);
+LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf);
+LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+
+LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
+LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
+LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
+
LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
LIBBPF_API int btf__load(struct btf *btf);
+LIBBPF_API int btf__load_into_kernel(struct btf *btf);
LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
const char *type_name);
LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
@@ -66,7 +75,6 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd);
LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset);
-LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
__u32 expected_key_size,
__u32 expected_value_size,
@@ -89,8 +97,6 @@ int btf_ext__reloc_line_info(const struct btf *btf,
LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
-LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
-
LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf,
@@ -184,6 +190,25 @@ LIBBPF_API int
btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
const struct btf_dump_emit_type_decl_opts *opts);
+
+struct btf_dump_type_data_opts {
+ /* size of this struct, for forward/backward compatibility */
+ size_t sz;
+ const char *indent_str;
+ int indent_level;
+ /* below match "show" flags for bpf_show_snprintf() */
+ bool compact; /* no newlines/indentation */
+ bool skip_names; /* skip member/type names */
+ bool emit_zeroes; /* show 0-valued fields */
+ size_t :0;
+};
+#define btf_dump_type_data_opts__last_field emit_zeroes
+
+LIBBPF_API int
+btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+ const void *data, size_t data_sz,
+ const struct btf_dump_type_data_opts *opts);
+
/*
* A set of helpers for easier BTF types handling
*/
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 5dc6b5172bb3..e4b483f15fb9 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -10,6 +10,8 @@
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
+#include <ctype.h>
+#include <endian.h>
#include <errno.h>
#include <linux/err.h>
#include <linux/btf.h>
@@ -53,6 +55,26 @@ struct btf_dump_type_aux_state {
__u8 referenced: 1;
};
+/* indent string length; one indent string is added for each indent level */
+#define BTF_DATA_INDENT_STR_LEN 32
+
+/*
+ * Common internal data for BTF type data dump operations.
+ */
+struct btf_dump_data {
+ const void *data_end; /* end of valid data to show */
+ bool compact;
+ bool skip_names;
+ bool emit_zeroes;
+ __u8 indent_lvl; /* base indent level */
+ char indent_str[BTF_DATA_INDENT_STR_LEN];
+ /* below are used during iteration */
+ int depth;
+ bool is_array_member;
+ bool is_array_terminated;
+ bool is_array_char;
+};
+
struct btf_dump {
const struct btf *btf;
const struct btf_ext *btf_ext;
@@ -60,6 +82,7 @@ struct btf_dump {
struct btf_dump_opts opts;
int ptr_sz;
bool strip_mods;
+ bool skip_anon_defs;
int last_id;
/* per-type auxiliary state */
@@ -89,6 +112,10 @@ struct btf_dump {
* name occurrences
*/
struct hashmap *ident_names;
+ /*
+ * data for typed display; allocated if needed.
+ */
+ struct btf_dump_data *typed_dump;
};
static size_t str_hash_fn(const void *key, void *ctx)
@@ -765,11 +792,11 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
break;
case BTF_KIND_FUNC_PROTO: {
const struct btf_param *p = btf_params(t);
- __u16 vlen = btf_vlen(t);
+ __u16 n = btf_vlen(t);
int i;
btf_dump_emit_type(d, t->type, cont_id);
- for (i = 0; i < vlen; i++, p++)
+ for (i = 0; i < n; i++, p++)
btf_dump_emit_type(d, p->type, cont_id);
break;
@@ -852,8 +879,9 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
const struct btf_type *t)
{
- btf_dump_printf(d, "%s %s",
+ btf_dump_printf(d, "%s%s%s",
btf_is_struct(t) ? "struct" : "union",
+ t->name_off ? " " : "",
btf_dump_type_name(d, id));
}
@@ -1259,7 +1287,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
case BTF_KIND_UNION:
btf_dump_emit_mods(d, decls);
/* inline anonymous struct/union */
- if (t->name_off == 0)
+ if (t->name_off == 0 && !d->skip_anon_defs)
btf_dump_emit_struct_def(d, id, t, lvl);
else
btf_dump_emit_struct_fwd(d, id, t);
@@ -1267,7 +1295,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
case BTF_KIND_ENUM:
btf_dump_emit_mods(d, decls);
/* inline anonymous enum */
- if (t->name_off == 0)
+ if (t->name_off == 0 && !d->skip_anon_defs)
btf_dump_emit_enum_def(d, id, t, lvl);
else
btf_dump_emit_enum_fwd(d, id, t);
@@ -1392,6 +1420,39 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
btf_dump_emit_name(d, fname, last_was_ptr);
}
+/* show type name as (type_name) */
+static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,
+ bool top_level)
+{
+ const struct btf_type *t;
+
+ /* for array members, we don't bother emitting type name for each
+ * member to avoid the redundancy of
+ * .name = (char[4])[(char)'f',(char)'o',(char)'o',]
+ */
+ if (d->typed_dump->is_array_member)
+ return;
+
+ /* avoid type name specification for variable/section; it will be done
+ * for the associated variable value(s).
+ */
+ t = btf__type_by_id(d->btf, id);
+ if (btf_is_var(t) || btf_is_datasec(t))
+ return;
+
+ if (top_level)
+ btf_dump_printf(d, "(");
+
+ d->skip_anon_defs = true;
+ d->strip_mods = true;
+ btf_dump_emit_type_decl(d, id, "", 0);
+ d->strip_mods = false;
+ d->skip_anon_defs = false;
+
+ if (top_level)
+ btf_dump_printf(d, ")");
+}
+
/* return number of duplicates (occurrences) of a given name */
static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
const char *orig_name)
@@ -1442,3 +1503,803 @@ static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)
{
return btf_dump_resolve_name(d, id, d->ident_names);
}
+
+static int btf_dump_dump_type_data(struct btf_dump *d,
+ const char *fname,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz);
+
+static const char *btf_dump_data_newline(struct btf_dump *d)
+{
+ return d->typed_dump->compact || d->typed_dump->depth == 0 ? "" : "\n";
+}
+
+static const char *btf_dump_data_delim(struct btf_dump *d)
+{
+ return d->typed_dump->depth == 0 ? "" : ",";
+}
+
+static void btf_dump_data_pfx(struct btf_dump *d)
+{
+ int i, lvl = d->typed_dump->indent_lvl + d->typed_dump->depth;
+
+ if (d->typed_dump->compact)
+ return;
+
+ for (i = 0; i < lvl; i++)
+ btf_dump_printf(d, "%s", d->typed_dump->indent_str);
+}
+
+/* A macro is used here as btf_type_value[s]() appends format specifiers
+ * to the format specifier passed in; these do the work of appending
+ * delimiters etc while the caller simply has to specify the type values
+ * in the format specifier + value(s).
+ */
+#define btf_dump_type_values(d, fmt, ...) \
+ btf_dump_printf(d, fmt "%s%s", \
+ ##__VA_ARGS__, \
+ btf_dump_data_delim(d), \
+ btf_dump_data_newline(d))
+
+static int btf_dump_unsupported_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id)
+{
+ btf_dump_printf(d, "<unsupported kind:%u>", btf_kind(t));
+ return -ENOTSUP;
+}
+
+static int btf_dump_get_bitfield_value(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz,
+ __u64 *value)
+{
+ __u16 left_shift_bits, right_shift_bits;
+ __u8 nr_copy_bits, nr_copy_bytes;
+ const __u8 *bytes = data;
+ int sz = t->size;
+ __u64 num = 0;
+ int i;
+
+ /* Maximum supported bitfield size is 64 bits */
+ if (sz > 8) {
+ pr_warn("unexpected bitfield size %d\n", sz);
+ return -EINVAL;
+ }
+
+ /* Bitfield value retrieval is done in two steps; first relevant bytes are
+ * stored in num, then we left/right shift num to eliminate irrelevant bits.
+ */
+ nr_copy_bits = bit_sz + bits_offset;
+ nr_copy_bytes = t->size;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ for (i = nr_copy_bytes - 1; i >= 0; i--)
+ num = num * 256 + bytes[i];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ for (i = 0; i < nr_copy_bytes; i++)
+ num = num * 256 + bytes[i];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ left_shift_bits = 64 - nr_copy_bits;
+ right_shift_bits = 64 - bit_sz;
+
+ *value = (num << left_shift_bits) >> right_shift_bits;
+
+ return 0;
+}
+
+static int btf_dump_bitfield_check_zero(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __u64 check_num;
+ int err;
+
+ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &check_num);
+ if (err)
+ return err;
+ if (check_num == 0)
+ return -ENODATA;
+ return 0;
+}
+
+static int btf_dump_bitfield_data(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __u64 print_num;
+ int err;
+
+ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &print_num);
+ if (err)
+ return err;
+
+ btf_dump_type_values(d, "0x%llx", (unsigned long long)print_num);
+
+ return 0;
+}
+
+/* ints, floats and ptrs */
+static int btf_dump_base_type_check_zero(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ static __u8 bytecmp[16] = {};
+ int nr_bytes;
+
+ /* For pointer types, pointer size is not defined on a per-type basis.
+ * On dump creation however, we store the pointer size.
+ */
+ if (btf_kind(t) == BTF_KIND_PTR)
+ nr_bytes = d->ptr_sz;
+ else
+ nr_bytes = t->size;
+
+ if (nr_bytes < 1 || nr_bytes > 16) {
+ pr_warn("unexpected size %d for id [%u]\n", nr_bytes, id);
+ return -EINVAL;
+ }
+
+ if (memcmp(data, bytecmp, nr_bytes) == 0)
+ return -ENODATA;
+ return 0;
+}
+
+static bool ptr_is_aligned(const void *data, int data_sz)
+{
+ return ((uintptr_t)data) % data_sz == 0;
+}
+
+static int btf_dump_int_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 type_id,
+ const void *data,
+ __u8 bits_offset)
+{
+ __u8 encoding = btf_int_encoding(t);
+ bool sign = encoding & BTF_INT_SIGNED;
+ int sz = t->size;
+
+ if (sz == 0) {
+ pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+ return -EINVAL;
+ }
+
+ /* handle packed int data - accesses of integers not aligned on
+ * int boundaries can cause problems on some platforms.
+ */
+ if (!ptr_is_aligned(data, sz))
+ return btf_dump_bitfield_data(d, t, data, 0, 0);
+
+ switch (sz) {
+ case 16: {
+ const __u64 *ints = data;
+ __u64 lsi, msi;
+
+ /* avoid use of __int128 as some 32-bit platforms do not
+ * support it.
+ */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ lsi = ints[0];
+ msi = ints[1];
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ lsi = ints[1];
+ msi = ints[0];
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ if (msi == 0)
+ btf_dump_type_values(d, "0x%llx", (unsigned long long)lsi);
+ else
+ btf_dump_type_values(d, "0x%llx%016llx", (unsigned long long)msi,
+ (unsigned long long)lsi);
+ break;
+ }
+ case 8:
+ if (sign)
+ btf_dump_type_values(d, "%lld", *(long long *)data);
+ else
+ btf_dump_type_values(d, "%llu", *(unsigned long long *)data);
+ break;
+ case 4:
+ if (sign)
+ btf_dump_type_values(d, "%d", *(__s32 *)data);
+ else
+ btf_dump_type_values(d, "%u", *(__u32 *)data);
+ break;
+ case 2:
+ if (sign)
+ btf_dump_type_values(d, "%d", *(__s16 *)data);
+ else
+ btf_dump_type_values(d, "%u", *(__u16 *)data);
+ break;
+ case 1:
+ if (d->typed_dump->is_array_char) {
+ /* check for null terminator */
+ if (d->typed_dump->is_array_terminated)
+ break;
+ if (*(char *)data == '\0') {
+ d->typed_dump->is_array_terminated = true;
+ break;
+ }
+ if (isprint(*(char *)data)) {
+ btf_dump_type_values(d, "'%c'", *(char *)data);
+ break;
+ }
+ }
+ if (sign)
+ btf_dump_type_values(d, "%d", *(__s8 *)data);
+ else
+ btf_dump_type_values(d, "%u", *(__u8 *)data);
+ break;
+ default:
+ pr_warn("unexpected sz %d for id [%u]\n", sz, type_id);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+union float_data {
+ long double ld;
+ double d;
+ float f;
+};
+
+static int btf_dump_float_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 type_id,
+ const void *data)
+{
+ const union float_data *flp = data;
+ union float_data fl;
+ int sz = t->size;
+
+ /* handle unaligned data; copy to local union */
+ if (!ptr_is_aligned(data, sz)) {
+ memcpy(&fl, data, sz);
+ flp = &fl;
+ }
+
+ switch (sz) {
+ case 16:
+ btf_dump_type_values(d, "%Lf", flp->ld);
+ break;
+ case 8:
+ btf_dump_type_values(d, "%lf", flp->d);
+ break;
+ case 4:
+ btf_dump_type_values(d, "%f", flp->f);
+ break;
+ default:
+ pr_warn("unexpected size %d for id [%u]\n", sz, type_id);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int btf_dump_var_data(struct btf_dump *d,
+ const struct btf_type *v,
+ __u32 id,
+ const void *data)
+{
+ enum btf_func_linkage linkage = btf_var(v)->linkage;
+ const struct btf_type *t;
+ const char *l;
+ __u32 type_id;
+
+ switch (linkage) {
+ case BTF_FUNC_STATIC:
+ l = "static ";
+ break;
+ case BTF_FUNC_EXTERN:
+ l = "extern ";
+ break;
+ case BTF_FUNC_GLOBAL:
+ default:
+ l = "";
+ break;
+ }
+
+ /* format of output here is [linkage] [type] [varname] = (type)value,
+ * for example "static int cpu_profile_flip = (int)1"
+ */
+ btf_dump_printf(d, "%s", l);
+ type_id = v->type;
+ t = btf__type_by_id(d->btf, type_id);
+ btf_dump_emit_type_cast(d, type_id, false);
+ btf_dump_printf(d, " %s = ", btf_name_of(d, v->name_off));
+ return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);
+}
+
+static int btf_dump_array_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_array *array = btf_array(t);
+ const struct btf_type *elem_type;
+ __u32 i, elem_size = 0, elem_type_id;
+ bool is_array_member;
+
+ elem_type_id = array->type;
+ elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+ elem_size = btf__resolve_size(d->btf, elem_type_id);
+ if (elem_size <= 0) {
+ pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);
+ return -EINVAL;
+ }
+
+ if (btf_is_int(elem_type)) {
+ /*
+ * BTF_INT_CHAR encoding never seems to be set for
+ * char arrays, so if size is 1 and element is
+ * printable as a char, we'll do that.
+ */
+ if (elem_size == 1)
+ d->typed_dump->is_array_char = true;
+ }
+
+ /* note that we increment depth before calling btf_dump_print() below;
+ * this is intentional. btf_dump_data_newline() will not print a
+ * newline for depth 0 (since this leaves us with trailing newlines
+ * at the end of typed display), so depth is incremented first.
+ * For similar reasons, we decrement depth before showing the closing
+ * parenthesis.
+ */
+ d->typed_dump->depth++;
+ btf_dump_printf(d, "[%s", btf_dump_data_newline(d));
+
+ /* may be a multidimensional array, so store current "is array member"
+ * status so we can restore it correctly later.
+ */
+ is_array_member = d->typed_dump->is_array_member;
+ d->typed_dump->is_array_member = true;
+ for (i = 0; i < array->nelems; i++, data += elem_size) {
+ if (d->typed_dump->is_array_terminated)
+ break;
+ btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0);
+ }
+ d->typed_dump->is_array_member = is_array_member;
+ d->typed_dump->depth--;
+ btf_dump_data_pfx(d);
+ btf_dump_type_values(d, "]");
+
+ return 0;
+}
+
+static int btf_dump_struct_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_member *m = btf_members(t);
+ __u16 n = btf_vlen(t);
+ int i, err;
+
+ /* note that we increment depth before calling btf_dump_print() below;
+ * this is intentional. btf_dump_data_newline() will not print a
+ * newline for depth 0 (since this leaves us with trailing newlines
+ * at the end of typed display), so depth is incremented first.
+ * For similar reasons, we decrement depth before showing the closing
+ * parenthesis.
+ */
+ d->typed_dump->depth++;
+ btf_dump_printf(d, "{%s", btf_dump_data_newline(d));
+
+ for (i = 0; i < n; i++, m++) {
+ const struct btf_type *mtype;
+ const char *mname;
+ __u32 moffset;
+ __u8 bit_sz;
+
+ mtype = btf__type_by_id(d->btf, m->type);
+ mname = btf_name_of(d, m->name_off);
+ moffset = btf_member_bit_offset(t, i);
+
+ bit_sz = btf_member_bitfield_size(t, i);
+ err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8,
+ moffset % 8, bit_sz);
+ if (err < 0)
+ return err;
+ }
+ d->typed_dump->depth--;
+ btf_dump_data_pfx(d);
+ btf_dump_type_values(d, "}");
+ return err;
+}
+
+union ptr_data {
+ unsigned int p;
+ unsigned long long lp;
+};
+
+static int btf_dump_ptr_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) {
+ btf_dump_type_values(d, "%p", *(void **)data);
+ } else {
+ union ptr_data pt;
+
+ memcpy(&pt, data, d->ptr_sz);
+ if (d->ptr_sz == 4)
+ btf_dump_type_values(d, "0x%x", pt.p);
+ else
+ btf_dump_type_values(d, "0x%llx", pt.lp);
+ }
+ return 0;
+}
+
+static int btf_dump_get_enum_value(struct btf_dump *d,
+ const struct btf_type *t,
+ const void *data,
+ __u32 id,
+ __s64 *value)
+{
+ int sz = t->size;
+
+ /* handle unaligned enum value */
+ if (!ptr_is_aligned(data, sz)) {
+ __u64 val;
+ int err;
+
+ err = btf_dump_get_bitfield_value(d, t, data, 0, 0, &val);
+ if (err)
+ return err;
+ *value = (__s64)val;
+ return 0;
+ }
+
+ switch (t->size) {
+ case 8:
+ *value = *(__s64 *)data;
+ return 0;
+ case 4:
+ *value = *(__s32 *)data;
+ return 0;
+ case 2:
+ *value = *(__s16 *)data;
+ return 0;
+ case 1:
+ *value = *(__s8 *)data;
+ return 0;
+ default:
+ pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id);
+ return -EINVAL;
+ }
+}
+
+static int btf_dump_enum_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_enum *e;
+ __s64 value;
+ int i, err;
+
+ err = btf_dump_get_enum_value(d, t, data, id, &value);
+ if (err)
+ return err;
+
+ for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) {
+ if (value != e->val)
+ continue;
+ btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off));
+ return 0;
+ }
+
+ btf_dump_type_values(d, "%d", value);
+ return 0;
+}
+
+static int btf_dump_datasec_data(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data)
+{
+ const struct btf_var_secinfo *vsi;
+ const struct btf_type *var;
+ __u32 i;
+ int err;
+
+ btf_dump_type_values(d, "SEC(\"%s\") ", btf_name_of(d, t->name_off));
+
+ for (i = 0, vsi = btf_var_secinfos(t); i < btf_vlen(t); i++, vsi++) {
+ var = btf__type_by_id(d->btf, vsi->type);
+ err = btf_dump_dump_type_data(d, NULL, var, vsi->type, data + vsi->offset, 0, 0);
+ if (err < 0)
+ return err;
+ btf_dump_printf(d, ";");
+ }
+ return 0;
+}
+
+/* return size of type, or if base type overflows, return -E2BIG. */
+static int btf_dump_type_data_check_overflow(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset)
+{
+ __s64 size = btf__resolve_size(d->btf, id);
+
+ if (size < 0 || size >= INT_MAX) {
+ pr_warn("unexpected size [%zu] for id [%u]\n",
+ (size_t)size, id);
+ return -EINVAL;
+ }
+
+ /* Only do overflow checking for base types; we do not want to
+ * avoid showing part of a struct, union or array, even if we
+ * do not have enough data to show the full object. By
+ * restricting overflow checking to base types we can ensure
+ * that partial display succeeds, while avoiding overflowing
+ * and using bogus data for display.
+ */
+ t = skip_mods_and_typedefs(d->btf, id, NULL);
+ if (!t) {
+ pr_warn("unexpected error skipping mods/typedefs for id [%u]\n",
+ id);
+ return -EINVAL;
+ }
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_ENUM:
+ if (data + bits_offset / 8 + size > d->typed_dump->data_end)
+ return -E2BIG;
+ break;
+ default:
+ break;
+ }
+ return (int)size;
+}
+
+static int btf_dump_type_data_check_zero(struct btf_dump *d,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ __s64 value;
+ int i, err;
+
+ /* toplevel exceptions; we show zero values if
+ * - we ask for them (emit_zeros)
+ * - if we are at top-level so we see "struct empty { }"
+ * - or if we are an array member and the array is non-empty and
+ * not a char array; we don't want to be in a situation where we
+ * have an integer array 0, 1, 0, 1 and only show non-zero values.
+ * If the array contains zeroes only, or is a char array starting
+ * with a '\0', the array-level check_zero() will prevent showing it;
+ * we are concerned with determining zero value at the array member
+ * level here.
+ */
+ if (d->typed_dump->emit_zeroes || d->typed_dump->depth == 0 ||
+ (d->typed_dump->is_array_member &&
+ !d->typed_dump->is_array_char))
+ return 0;
+
+ t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ if (bit_sz)
+ return btf_dump_bitfield_check_zero(d, t, data, bits_offset, bit_sz);
+ return btf_dump_base_type_check_zero(d, t, id, data);
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_PTR:
+ return btf_dump_base_type_check_zero(d, t, id, data);
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *array = btf_array(t);
+ const struct btf_type *elem_type;
+ __u32 elem_type_id, elem_size;
+ bool ischar;
+
+ elem_type_id = array->type;
+ elem_size = btf__resolve_size(d->btf, elem_type_id);
+ elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
+
+ ischar = btf_is_int(elem_type) && elem_size == 1;
+
+ /* check all elements; if _any_ element is nonzero, all
+ * of array is displayed. We make an exception however
+ * for char arrays where the first element is 0; these
+ * are considered zeroed also, even if later elements are
+ * non-zero because the string is terminated.
+ */
+ for (i = 0; i < array->nelems; i++) {
+ if (i == 0 && ischar && *(char *)data == 0)
+ return -ENODATA;
+ err = btf_dump_type_data_check_zero(d, elem_type,
+ elem_type_id,
+ data +
+ (i * elem_size),
+ bits_offset, 0);
+ if (err != -ENODATA)
+ return err;
+ }
+ return -ENODATA;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+ __u16 n = btf_vlen(t);
+
+ /* if any struct/union member is non-zero, the struct/union
+ * is considered non-zero and dumped.
+ */
+ for (i = 0; i < n; i++, m++) {
+ const struct btf_type *mtype;
+ __u32 moffset;
+
+ mtype = btf__type_by_id(d->btf, m->type);
+ moffset = btf_member_bit_offset(t, i);
+
+ /* btf_int_bits() does not store member bitfield size;
+ * bitfield size needs to be stored here so int display
+ * of member can retrieve it.
+ */
+ bit_sz = btf_member_bitfield_size(t, i);
+ err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8,
+ moffset % 8, bit_sz);
+ if (err != ENODATA)
+ return err;
+ }
+ return -ENODATA;
+ }
+ case BTF_KIND_ENUM:
+ err = btf_dump_get_enum_value(d, t, data, id, &value);
+ if (err)
+ return err;
+ if (value == 0)
+ return -ENODATA;
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+/* returns size of data dumped, or error. */
+static int btf_dump_dump_type_data(struct btf_dump *d,
+ const char *fname,
+ const struct btf_type *t,
+ __u32 id,
+ const void *data,
+ __u8 bits_offset,
+ __u8 bit_sz)
+{
+ int size, err;
+
+ size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);
+ if (size < 0)
+ return size;
+ err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz);
+ if (err) {
+ /* zeroed data is expected and not an error, so simply skip
+ * dumping such data. Record other errors however.
+ */
+ if (err == -ENODATA)
+ return size;
+ return err;
+ }
+ btf_dump_data_pfx(d);
+
+ if (!d->typed_dump->skip_names) {
+ if (fname && strlen(fname) > 0)
+ btf_dump_printf(d, ".%s = ", fname);
+ btf_dump_emit_type_cast(d, id, true);
+ }
+
+ t = skip_mods_and_typedefs(d->btf, id, NULL);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_UNKN:
+ case BTF_KIND_FWD:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_FUNC_PROTO:
+ err = btf_dump_unsupported_data(d, t, id);
+ break;
+ case BTF_KIND_INT:
+ if (bit_sz)
+ err = btf_dump_bitfield_data(d, t, data, bits_offset, bit_sz);
+ else
+ err = btf_dump_int_data(d, t, id, data, bits_offset);
+ break;
+ case BTF_KIND_FLOAT:
+ err = btf_dump_float_data(d, t, id, data);
+ break;
+ case BTF_KIND_PTR:
+ err = btf_dump_ptr_data(d, t, id, data);
+ break;
+ case BTF_KIND_ARRAY:
+ err = btf_dump_array_data(d, t, id, data);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ err = btf_dump_struct_data(d, t, id, data);
+ break;
+ case BTF_KIND_ENUM:
+ /* handle bitfield and int enum values */
+ if (bit_sz) {
+ __u64 print_num;
+ __s64 enum_val;
+
+ err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz,
+ &print_num);
+ if (err)
+ break;
+ enum_val = (__s64)print_num;
+ err = btf_dump_enum_data(d, t, id, &enum_val);
+ } else
+ err = btf_dump_enum_data(d, t, id, data);
+ break;
+ case BTF_KIND_VAR:
+ err = btf_dump_var_data(d, t, id, data);
+ break;
+ case BTF_KIND_DATASEC:
+ err = btf_dump_datasec_data(d, t, id, data);
+ break;
+ default:
+ pr_warn("unexpected kind [%u] for id [%u]\n",
+ BTF_INFO_KIND(t->info), id);
+ return -EINVAL;
+ }
+ if (err < 0)
+ return err;
+ return size;
+}
+
+int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
+ const void *data, size_t data_sz,
+ const struct btf_dump_type_data_opts *opts)
+{
+ struct btf_dump_data typed_dump = {};
+ const struct btf_type *t;
+ int ret;
+
+ if (!OPTS_VALID(opts, btf_dump_type_data_opts))
+ return libbpf_err(-EINVAL);
+
+ t = btf__type_by_id(d->btf, id);
+ if (!t)
+ return libbpf_err(-ENOENT);
+
+ d->typed_dump = &typed_dump;
+ d->typed_dump->data_end = data + data_sz;
+ d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);
+
+ /* default indent string is a tab */
+ if (!opts->indent_str)
+ d->typed_dump->indent_str[0] = '\t';
+ else
+ strncat(d->typed_dump->indent_str, opts->indent_str,
+ sizeof(d->typed_dump->indent_str) - 1);
+
+ d->typed_dump->compact = OPTS_GET(opts, compact, false);
+ d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);
+ d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false);
+
+ ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0);
+
+ d->typed_dump = NULL;
+
+ return libbpf_err(ret);
+}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6f5e2757bb3c..88d8825fc6f6 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -193,6 +193,8 @@ enum kern_feature_id {
FEAT_MODULE_BTF,
/* BTF_KIND_FLOAT support */
FEAT_BTF_FLOAT,
+ /* BPF perf link support */
+ FEAT_PERF_LINK,
__FEAT_CNT,
};
@@ -498,6 +500,10 @@ struct bpf_object {
* it at load time.
*/
struct btf *btf_vmlinux;
+ /* Path to the custom BTF to be used for BPF CO-RE relocations as an
+ * override for vmlinux BTF.
+ */
+ char *btf_custom_path;
/* vmlinux BTF override for CO-RE relocations */
struct btf *btf_vmlinux_override;
/* Lazily initialized kernel module BTFs */
@@ -591,11 +597,6 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
insn->off == 0;
}
-static bool is_ldimm64_insn(struct bpf_insn *insn)
-{
- return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
-}
-
static bool is_call_insn(const struct bpf_insn *insn)
{
return insn->code == (BPF_JMP | BPF_CALL);
@@ -2645,8 +2646,10 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
struct bpf_program *prog;
int i;
- /* CO-RE relocations need kernel BTF */
- if (obj->btf_ext && obj->btf_ext->core_relo_info.len)
+ /* CO-RE relocations need kernel BTF, only when btf_custom_path
+ * is not specified
+ */
+ if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
return true;
/* Support for typed ksyms needs kernel BTF */
@@ -2679,7 +2682,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
if (!force && !obj_needs_vmlinux_btf(obj))
return 0;
- obj->btf_vmlinux = libbpf_find_kernel_btf();
+ obj->btf_vmlinux = btf__load_vmlinux_btf();
err = libbpf_get_error(obj->btf_vmlinux);
if (err) {
pr_warn("Error loading vmlinux BTF: %d\n", err);
@@ -2768,7 +2771,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
*/
btf__set_fd(kern_btf, 0);
} else {
- err = btf__load(kern_btf);
+ err = btf__load_into_kernel(kern_btf);
}
if (sanitize) {
if (!err) {
@@ -3894,6 +3897,42 @@ static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
return 0;
}
+static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
+{
+ char file[PATH_MAX], buff[4096];
+ FILE *fp;
+ __u32 val;
+ int err;
+
+ snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
+ memset(info, 0, sizeof(*info));
+
+ fp = fopen(file, "r");
+ if (!fp) {
+ err = -errno;
+ pr_warn("failed to open %s: %d. No procfs support?\n", file,
+ err);
+ return err;
+ }
+
+ while (fgets(buff, sizeof(buff), fp)) {
+ if (sscanf(buff, "map_type:\t%u", &val) == 1)
+ info->type = val;
+ else if (sscanf(buff, "key_size:\t%u", &val) == 1)
+ info->key_size = val;
+ else if (sscanf(buff, "value_size:\t%u", &val) == 1)
+ info->value_size = val;
+ else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
+ info->max_entries = val;
+ else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
+ info->map_flags = val;
+ }
+
+ fclose(fp);
+
+ return 0;
+}
+
int bpf_map__reuse_fd(struct bpf_map *map, int fd)
{
struct bpf_map_info info = {};
@@ -3902,6 +3941,8 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
char *new_name;
err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err && errno == EINVAL)
+ err = bpf_get_map_info_from_fdinfo(fd, &info);
if (err)
return libbpf_err(err);
@@ -4298,6 +4339,37 @@ static int probe_module_btf(void)
return !err;
}
+static int probe_perf_link(void)
+{
+ struct bpf_load_program_attr attr;
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int prog_fd, link_fd, err;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
+ attr.insns = insns;
+ attr.insns_cnt = ARRAY_SIZE(insns);
+ attr.license = "GPL";
+ prog_fd = bpf_load_program_xattr(&attr, NULL, 0);
+ if (prog_fd < 0)
+ return -errno;
+
+ /* use invalid perf_event FD to get EBADF, if link is supported;
+ * otherwise EINVAL should be returned
+ */
+ link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
+ err = -errno; /* close() can clobber errno */
+
+ if (link_fd >= 0)
+ close(link_fd);
+ close(prog_fd);
+
+ return link_fd < 0 && err == -EBADF;
+}
+
enum kern_feature_result {
FEAT_UNKNOWN = 0,
FEAT_SUPPORTED = 1,
@@ -4348,6 +4420,9 @@ static struct kern_feature_desc {
[FEAT_BTF_FLOAT] = {
"BTF_KIND_FLOAT support", probe_kern_btf_float,
},
+ [FEAT_PERF_LINK] = {
+ "BPF perf link support", probe_perf_link,
+ },
};
static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
@@ -4381,12 +4456,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
struct bpf_map_info map_info = {};
char msg[STRERR_BUFSIZE];
__u32 map_info_len;
+ int err;
map_info_len = sizeof(map_info);
- if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
- pr_warn("failed to get map info for map FD %d: %s\n",
- map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
+ err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
+ if (err && errno == EINVAL)
+ err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
+ if (err) {
+ pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
+ libbpf_strerror_r(errno, msg, sizeof(msg)));
return false;
}
@@ -4479,6 +4558,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
{
struct bpf_create_map_attr create_attr;
struct bpf_map_def *def = &map->def;
+ int err = 0;
memset(&create_attr, 0, sizeof(create_attr));
@@ -4521,8 +4601,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
if (bpf_map_type__is_map_in_map(def->type)) {
if (map->inner_map) {
- int err;
-
err = bpf_object__create_map(obj, map->inner_map, true);
if (err) {
pr_warn("map '%s': failed to create inner map: %d\n",
@@ -4547,8 +4625,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
if (map->fd < 0 && (create_attr.btf_key_type_id ||
create_attr.btf_value_type_id)) {
char *cp, errmsg[STRERR_BUFSIZE];
- int err = -errno;
+ err = -errno;
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
map->name, cp, err);
@@ -4560,8 +4638,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
map->fd = bpf_create_map_xattr(&create_attr);
}
- if (map->fd < 0)
- return -errno;
+ err = map->fd < 0 ? -errno : 0;
if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
if (obj->gen_loader)
@@ -4570,7 +4647,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
zfree(&map->inner_map);
}
- return 0;
+ return err;
}
static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
@@ -4616,10 +4693,13 @@ bpf_object__create_maps(struct bpf_object *obj)
char *cp, errmsg[STRERR_BUFSIZE];
unsigned int i, j;
int err;
+ bool retried;
for (i = 0; i < obj->nr_maps; i++) {
map = &obj->maps[i];
+ retried = false;
+retry:
if (map->pin_path) {
err = bpf_object__reuse_map(map);
if (err) {
@@ -4627,6 +4707,12 @@ bpf_object__create_maps(struct bpf_object *obj)
map->name);
goto err_out;
}
+ if (retried && map->fd < 0) {
+ pr_warn("map '%s': cannot find pinned map\n",
+ map->name);
+ err = -ENOENT;
+ goto err_out;
+ }
}
if (map->fd >= 0) {
@@ -4660,9 +4746,13 @@ bpf_object__create_maps(struct bpf_object *obj)
if (map->pin_path && !map->pinned) {
err = bpf_map__pin(map, NULL);
if (err) {
+ zclose(map->fd);
+ if (!retried && err == -EEXIST) {
+ retried = true;
+ goto retry;
+ }
pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
map->name, map->pin_path, err);
- zclose(map->fd);
goto err_out;
}
}
@@ -4679,279 +4769,6 @@ err_out:
return err;
}
-#define BPF_CORE_SPEC_MAX_LEN 64
-
-/* represents BPF CO-RE field or array element accessor */
-struct bpf_core_accessor {
- __u32 type_id; /* struct/union type or array element type */
- __u32 idx; /* field index or array index */
- const char *name; /* field name or NULL for array accessor */
-};
-
-struct bpf_core_spec {
- const struct btf *btf;
- /* high-level spec: named fields and array indices only */
- struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
- /* original unresolved (no skip_mods_or_typedefs) root type ID */
- __u32 root_type_id;
- /* CO-RE relocation kind */
- enum bpf_core_relo_kind relo_kind;
- /* high-level spec length */
- int len;
- /* raw, low-level spec: 1-to-1 with accessor spec string */
- int raw_spec[BPF_CORE_SPEC_MAX_LEN];
- /* raw spec length */
- int raw_len;
- /* field bit offset represented by spec */
- __u32 bit_offset;
-};
-
-static bool str_is_empty(const char *s)
-{
- return !s || !s[0];
-}
-
-static bool is_flex_arr(const struct btf *btf,
- const struct bpf_core_accessor *acc,
- const struct btf_array *arr)
-{
- const struct btf_type *t;
-
- /* not a flexible array, if not inside a struct or has non-zero size */
- if (!acc->name || arr->nelems > 0)
- return false;
-
- /* has to be the last member of enclosing struct */
- t = btf__type_by_id(btf, acc->type_id);
- return acc->idx == btf_vlen(t) - 1;
-}
-
-static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_FIELD_BYTE_OFFSET: return "byte_off";
- case BPF_FIELD_BYTE_SIZE: return "byte_sz";
- case BPF_FIELD_EXISTS: return "field_exists";
- case BPF_FIELD_SIGNED: return "signed";
- case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
- case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
- case BPF_TYPE_ID_LOCAL: return "local_type_id";
- case BPF_TYPE_ID_TARGET: return "target_type_id";
- case BPF_TYPE_EXISTS: return "type_exists";
- case BPF_TYPE_SIZE: return "type_size";
- case BPF_ENUMVAL_EXISTS: return "enumval_exists";
- case BPF_ENUMVAL_VALUE: return "enumval_value";
- default: return "unknown";
- }
-}
-
-static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_FIELD_BYTE_OFFSET:
- case BPF_FIELD_BYTE_SIZE:
- case BPF_FIELD_EXISTS:
- case BPF_FIELD_SIGNED:
- case BPF_FIELD_LSHIFT_U64:
- case BPF_FIELD_RSHIFT_U64:
- return true;
- default:
- return false;
- }
-}
-
-static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_TYPE_ID_LOCAL:
- case BPF_TYPE_ID_TARGET:
- case BPF_TYPE_EXISTS:
- case BPF_TYPE_SIZE:
- return true;
- default:
- return false;
- }
-}
-
-static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
-{
- switch (kind) {
- case BPF_ENUMVAL_EXISTS:
- case BPF_ENUMVAL_VALUE:
- return true;
- default:
- return false;
- }
-}
-
-/*
- * Turn bpf_core_relo into a low- and high-level spec representation,
- * validating correctness along the way, as well as calculating resulting
- * field bit offset, specified by accessor string. Low-level spec captures
- * every single level of nestedness, including traversing anonymous
- * struct/union members. High-level one only captures semantically meaningful
- * "turning points": named fields and array indicies.
- * E.g., for this case:
- *
- * struct sample {
- * int __unimportant;
- * struct {
- * int __1;
- * int __2;
- * int a[7];
- * };
- * };
- *
- * struct sample *s = ...;
- *
- * int x = &s->a[3]; // access string = '0:1:2:3'
- *
- * Low-level spec has 1:1 mapping with each element of access string (it's
- * just a parsed access string representation): [0, 1, 2, 3].
- *
- * High-level spec will capture only 3 points:
- * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
- * - field 'a' access (corresponds to '2' in low-level spec);
- * - array element #3 access (corresponds to '3' in low-level spec).
- *
- * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
- * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
- * spec and raw_spec are kept empty.
- *
- * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
- * string to specify enumerator's value index that need to be relocated.
- */
-static int bpf_core_parse_spec(const struct btf *btf,
- __u32 type_id,
- const char *spec_str,
- enum bpf_core_relo_kind relo_kind,
- struct bpf_core_spec *spec)
-{
- int access_idx, parsed_len, i;
- struct bpf_core_accessor *acc;
- const struct btf_type *t;
- const char *name;
- __u32 id;
- __s64 sz;
-
- if (str_is_empty(spec_str) || *spec_str == ':')
- return -EINVAL;
-
- memset(spec, 0, sizeof(*spec));
- spec->btf = btf;
- spec->root_type_id = type_id;
- spec->relo_kind = relo_kind;
-
- /* type-based relocations don't have a field access string */
- if (core_relo_is_type_based(relo_kind)) {
- if (strcmp(spec_str, "0"))
- return -EINVAL;
- return 0;
- }
-
- /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
- while (*spec_str) {
- if (*spec_str == ':')
- ++spec_str;
- if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
- return -EINVAL;
- if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
- return -E2BIG;
- spec_str += parsed_len;
- spec->raw_spec[spec->raw_len++] = access_idx;
- }
-
- if (spec->raw_len == 0)
- return -EINVAL;
-
- t = skip_mods_and_typedefs(btf, type_id, &id);
- if (!t)
- return -EINVAL;
-
- access_idx = spec->raw_spec[0];
- acc = &spec->spec[0];
- acc->type_id = id;
- acc->idx = access_idx;
- spec->len++;
-
- if (core_relo_is_enumval_based(relo_kind)) {
- if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
- return -EINVAL;
-
- /* record enumerator name in a first accessor */
- acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
- return 0;
- }
-
- if (!core_relo_is_field_based(relo_kind))
- return -EINVAL;
-
- sz = btf__resolve_size(btf, id);
- if (sz < 0)
- return sz;
- spec->bit_offset = access_idx * sz * 8;
-
- for (i = 1; i < spec->raw_len; i++) {
- t = skip_mods_and_typedefs(btf, id, &id);
- if (!t)
- return -EINVAL;
-
- access_idx = spec->raw_spec[i];
- acc = &spec->spec[spec->len];
-
- if (btf_is_composite(t)) {
- const struct btf_member *m;
- __u32 bit_offset;
-
- if (access_idx >= btf_vlen(t))
- return -EINVAL;
-
- bit_offset = btf_member_bit_offset(t, access_idx);
- spec->bit_offset += bit_offset;
-
- m = btf_members(t) + access_idx;
- if (m->name_off) {
- name = btf__name_by_offset(btf, m->name_off);
- if (str_is_empty(name))
- return -EINVAL;
-
- acc->type_id = id;
- acc->idx = access_idx;
- acc->name = name;
- spec->len++;
- }
-
- id = m->type;
- } else if (btf_is_array(t)) {
- const struct btf_array *a = btf_array(t);
- bool flex;
-
- t = skip_mods_and_typedefs(btf, a->type, &id);
- if (!t)
- return -EINVAL;
-
- flex = is_flex_arr(btf, acc - 1, a);
- if (!flex && access_idx >= a->nelems)
- return -EINVAL;
-
- spec->spec[spec->len].type_id = id;
- spec->spec[spec->len].idx = access_idx;
- spec->len++;
-
- sz = btf__resolve_size(btf, id);
- if (sz < 0)
- return sz;
- spec->bit_offset += access_idx * sz * 8;
- } else {
- pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
- type_id, spec_str, i, id, btf_kind_str(t));
- return -EINVAL;
- }
- }
-
- return 0;
-}
-
static bool bpf_core_is_flavor_sep(const char *s)
{
/* check X___Y name pattern, where X and Y are not underscores */
@@ -4964,7 +4781,7 @@ static bool bpf_core_is_flavor_sep(const char *s)
* before last triple underscore. Struct name part after last triple
* underscore is ignored by BPF CO-RE relocation during relocation matching.
*/
-static size_t bpf_core_essential_name_len(const char *name)
+size_t bpf_core_essential_name_len(const char *name)
{
size_t n = strlen(name);
int i;
@@ -4976,34 +4793,20 @@ static size_t bpf_core_essential_name_len(const char *name)
return n;
}
-struct core_cand
-{
- const struct btf *btf;
- const struct btf_type *t;
- const char *name;
- __u32 id;
-};
-
-/* dynamically sized list of type IDs and its associated struct btf */
-struct core_cand_list {
- struct core_cand *cands;
- int len;
-};
-
-static void bpf_core_free_cands(struct core_cand_list *cands)
+static void bpf_core_free_cands(struct bpf_core_cand_list *cands)
{
free(cands->cands);
free(cands);
}
-static int bpf_core_add_cands(struct core_cand *local_cand,
+static int bpf_core_add_cands(struct bpf_core_cand *local_cand,
size_t local_essent_len,
const struct btf *targ_btf,
const char *targ_btf_name,
int targ_start_id,
- struct core_cand_list *cands)
+ struct bpf_core_cand_list *cands)
{
- struct core_cand *new_cands, *cand;
+ struct bpf_core_cand *new_cands, *cand;
const struct btf_type *t;
const char *targ_name;
size_t targ_essent_len;
@@ -5139,11 +4942,11 @@ err_out:
return 0;
}
-static struct core_cand_list *
+static struct bpf_core_cand_list *
bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
{
- struct core_cand local_cand = {};
- struct core_cand_list *cands;
+ struct bpf_core_cand local_cand = {};
+ struct bpf_core_cand_list *cands;
const struct btf *main_btf;
size_t local_essent_len;
int err, i;
@@ -5197,165 +5000,6 @@ err_out:
return ERR_PTR(err);
}
-/* Check two types for compatibility for the purpose of field access
- * relocation. const/volatile/restrict and typedefs are skipped to ensure we
- * are relocating semantically compatible entities:
- * - any two STRUCTs/UNIONs are compatible and can be mixed;
- * - any two FWDs are compatible, if their names match (modulo flavor suffix);
- * - any two PTRs are always compatible;
- * - for ENUMs, names should be the same (ignoring flavor suffix) or at
- * least one of enums should be anonymous;
- * - for ENUMs, check sizes, names are ignored;
- * - for INT, size and signedness are ignored;
- * - any two FLOATs are always compatible;
- * - for ARRAY, dimensionality is ignored, element types are checked for
- * compatibility recursively;
- * - everything else shouldn't be ever a target of relocation.
- * These rules are not set in stone and probably will be adjusted as we get
- * more experience with using BPF CO-RE relocations.
- */
-static int bpf_core_fields_are_compat(const struct btf *local_btf,
- __u32 local_id,
- const struct btf *targ_btf,
- __u32 targ_id)
-{
- const struct btf_type *local_type, *targ_type;
-
-recur:
- local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
- targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
- if (!local_type || !targ_type)
- return -EINVAL;
-
- if (btf_is_composite(local_type) && btf_is_composite(targ_type))
- return 1;
- if (btf_kind(local_type) != btf_kind(targ_type))
- return 0;
-
- switch (btf_kind(local_type)) {
- case BTF_KIND_PTR:
- case BTF_KIND_FLOAT:
- return 1;
- case BTF_KIND_FWD:
- case BTF_KIND_ENUM: {
- const char *local_name, *targ_name;
- size_t local_len, targ_len;
-
- local_name = btf__name_by_offset(local_btf,
- local_type->name_off);
- targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
- local_len = bpf_core_essential_name_len(local_name);
- targ_len = bpf_core_essential_name_len(targ_name);
- /* one of them is anonymous or both w/ same flavor-less names */
- return local_len == 0 || targ_len == 0 ||
- (local_len == targ_len &&
- strncmp(local_name, targ_name, local_len) == 0);
- }
- case BTF_KIND_INT:
- /* just reject deprecated bitfield-like integers; all other
- * integers are by default compatible between each other
- */
- return btf_int_offset(local_type) == 0 &&
- btf_int_offset(targ_type) == 0;
- case BTF_KIND_ARRAY:
- local_id = btf_array(local_type)->type;
- targ_id = btf_array(targ_type)->type;
- goto recur;
- default:
- pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
- btf_kind(local_type), local_id, targ_id);
- return 0;
- }
-}
-
-/*
- * Given single high-level named field accessor in local type, find
- * corresponding high-level accessor for a target type. Along the way,
- * maintain low-level spec for target as well. Also keep updating target
- * bit offset.
- *
- * Searching is performed through recursive exhaustive enumeration of all
- * fields of a struct/union. If there are any anonymous (embedded)
- * structs/unions, they are recursively searched as well. If field with
- * desired name is found, check compatibility between local and target types,
- * before returning result.
- *
- * 1 is returned, if field is found.
- * 0 is returned if no compatible field is found.
- * <0 is returned on error.
- */
-static int bpf_core_match_member(const struct btf *local_btf,
- const struct bpf_core_accessor *local_acc,
- const struct btf *targ_btf,
- __u32 targ_id,
- struct bpf_core_spec *spec,
- __u32 *next_targ_id)
-{
- const struct btf_type *local_type, *targ_type;
- const struct btf_member *local_member, *m;
- const char *local_name, *targ_name;
- __u32 local_id;
- int i, n, found;
-
- targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
- if (!targ_type)
- return -EINVAL;
- if (!btf_is_composite(targ_type))
- return 0;
-
- local_id = local_acc->type_id;
- local_type = btf__type_by_id(local_btf, local_id);
- local_member = btf_members(local_type) + local_acc->idx;
- local_name = btf__name_by_offset(local_btf, local_member->name_off);
-
- n = btf_vlen(targ_type);
- m = btf_members(targ_type);
- for (i = 0; i < n; i++, m++) {
- __u32 bit_offset;
-
- bit_offset = btf_member_bit_offset(targ_type, i);
-
- /* too deep struct/union/array nesting */
- if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
- return -E2BIG;
-
- /* speculate this member will be the good one */
- spec->bit_offset += bit_offset;
- spec->raw_spec[spec->raw_len++] = i;
-
- targ_name = btf__name_by_offset(targ_btf, m->name_off);
- if (str_is_empty(targ_name)) {
- /* embedded struct/union, we need to go deeper */
- found = bpf_core_match_member(local_btf, local_acc,
- targ_btf, m->type,
- spec, next_targ_id);
- if (found) /* either found or error */
- return found;
- } else if (strcmp(local_name, targ_name) == 0) {
- /* matching named field */
- struct bpf_core_accessor *targ_acc;
-
- targ_acc = &spec->spec[spec->len++];
- targ_acc->type_id = targ_id;
- targ_acc->idx = i;
- targ_acc->name = targ_name;
-
- *next_targ_id = m->type;
- found = bpf_core_fields_are_compat(local_btf,
- local_member->type,
- targ_btf, m->type);
- if (!found)
- spec->len--; /* pop accessor */
- return found;
- }
- /* member turned out not to be what we looked for */
- spec->bit_offset -= bit_offset;
- spec->raw_len--;
- }
-
- return 0;
-}
-
/* Check local and target types for compatibility. This check is used for
* type-based CO-RE relocations and follow slightly different rules than
* field-based relocations. This function assumes that root types were already
@@ -5375,8 +5019,8 @@ static int bpf_core_match_member(const struct btf *local_btf,
* These rules are not set in stone and probably will be adjusted as we get
* more experience with using BPF CO-RE relocations.
*/
-static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
- const struct btf *targ_btf, __u32 targ_id)
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id)
{
const struct btf_type *local_type, *targ_type;
int depth = 32; /* max recursion depth */
@@ -5450,671 +5094,6 @@ recur:
}
}
-/*
- * Try to match local spec to a target type and, if successful, produce full
- * target spec (high-level, low-level + bit offset).
- */
-static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
- const struct btf *targ_btf, __u32 targ_id,
- struct bpf_core_spec *targ_spec)
-{
- const struct btf_type *targ_type;
- const struct bpf_core_accessor *local_acc;
- struct bpf_core_accessor *targ_acc;
- int i, sz, matched;
-
- memset(targ_spec, 0, sizeof(*targ_spec));
- targ_spec->btf = targ_btf;
- targ_spec->root_type_id = targ_id;
- targ_spec->relo_kind = local_spec->relo_kind;
-
- if (core_relo_is_type_based(local_spec->relo_kind)) {
- return bpf_core_types_are_compat(local_spec->btf,
- local_spec->root_type_id,
- targ_btf, targ_id);
- }
-
- local_acc = &local_spec->spec[0];
- targ_acc = &targ_spec->spec[0];
-
- if (core_relo_is_enumval_based(local_spec->relo_kind)) {
- size_t local_essent_len, targ_essent_len;
- const struct btf_enum *e;
- const char *targ_name;
-
- /* has to resolve to an enum */
- targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
- if (!btf_is_enum(targ_type))
- return 0;
-
- local_essent_len = bpf_core_essential_name_len(local_acc->name);
-
- for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
- targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
- targ_essent_len = bpf_core_essential_name_len(targ_name);
- if (targ_essent_len != local_essent_len)
- continue;
- if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
- targ_acc->type_id = targ_id;
- targ_acc->idx = i;
- targ_acc->name = targ_name;
- targ_spec->len++;
- targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
- targ_spec->raw_len++;
- return 1;
- }
- }
- return 0;
- }
-
- if (!core_relo_is_field_based(local_spec->relo_kind))
- return -EINVAL;
-
- for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
- targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
- &targ_id);
- if (!targ_type)
- return -EINVAL;
-
- if (local_acc->name) {
- matched = bpf_core_match_member(local_spec->btf,
- local_acc,
- targ_btf, targ_id,
- targ_spec, &targ_id);
- if (matched <= 0)
- return matched;
- } else {
- /* for i=0, targ_id is already treated as array element
- * type (because it's the original struct), for others
- * we should find array element type first
- */
- if (i > 0) {
- const struct btf_array *a;
- bool flex;
-
- if (!btf_is_array(targ_type))
- return 0;
-
- a = btf_array(targ_type);
- flex = is_flex_arr(targ_btf, targ_acc - 1, a);
- if (!flex && local_acc->idx >= a->nelems)
- return 0;
- if (!skip_mods_and_typedefs(targ_btf, a->type,
- &targ_id))
- return -EINVAL;
- }
-
- /* too deep struct/union/array nesting */
- if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
- return -E2BIG;
-
- targ_acc->type_id = targ_id;
- targ_acc->idx = local_acc->idx;
- targ_acc->name = NULL;
- targ_spec->len++;
- targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
- targ_spec->raw_len++;
-
- sz = btf__resolve_size(targ_btf, targ_id);
- if (sz < 0)
- return sz;
- targ_spec->bit_offset += local_acc->idx * sz * 8;
- }
- }
-
- return 1;
-}
-
-static int bpf_core_calc_field_relo(const struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- const struct bpf_core_spec *spec,
- __u32 *val, __u32 *field_sz, __u32 *type_id,
- bool *validate)
-{
- const struct bpf_core_accessor *acc;
- const struct btf_type *t;
- __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
- const struct btf_member *m;
- const struct btf_type *mt;
- bool bitfield;
- __s64 sz;
-
- *field_sz = 0;
-
- if (relo->kind == BPF_FIELD_EXISTS) {
- *val = spec ? 1 : 0;
- return 0;
- }
-
- if (!spec)
- return -EUCLEAN; /* request instruction poisoning */
-
- acc = &spec->spec[spec->len - 1];
- t = btf__type_by_id(spec->btf, acc->type_id);
-
- /* a[n] accessor needs special handling */
- if (!acc->name) {
- if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
- *val = spec->bit_offset / 8;
- /* remember field size for load/store mem size */
- sz = btf__resolve_size(spec->btf, acc->type_id);
- if (sz < 0)
- return -EINVAL;
- *field_sz = sz;
- *type_id = acc->type_id;
- } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
- sz = btf__resolve_size(spec->btf, acc->type_id);
- if (sz < 0)
- return -EINVAL;
- *val = sz;
- } else {
- pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
- prog->name, relo->kind, relo->insn_off / 8);
- return -EINVAL;
- }
- if (validate)
- *validate = true;
- return 0;
- }
-
- m = btf_members(t) + acc->idx;
- mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
- bit_off = spec->bit_offset;
- bit_sz = btf_member_bitfield_size(t, acc->idx);
-
- bitfield = bit_sz > 0;
- if (bitfield) {
- byte_sz = mt->size;
- byte_off = bit_off / 8 / byte_sz * byte_sz;
- /* figure out smallest int size necessary for bitfield load */
- while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
- if (byte_sz >= 8) {
- /* bitfield can't be read with 64-bit read */
- pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
- prog->name, relo->kind, relo->insn_off / 8);
- return -E2BIG;
- }
- byte_sz *= 2;
- byte_off = bit_off / 8 / byte_sz * byte_sz;
- }
- } else {
- sz = btf__resolve_size(spec->btf, field_type_id);
- if (sz < 0)
- return -EINVAL;
- byte_sz = sz;
- byte_off = spec->bit_offset / 8;
- bit_sz = byte_sz * 8;
- }
-
- /* for bitfields, all the relocatable aspects are ambiguous and we
- * might disagree with compiler, so turn off validation of expected
- * value, except for signedness
- */
- if (validate)
- *validate = !bitfield;
-
- switch (relo->kind) {
- case BPF_FIELD_BYTE_OFFSET:
- *val = byte_off;
- if (!bitfield) {
- *field_sz = byte_sz;
- *type_id = field_type_id;
- }
- break;
- case BPF_FIELD_BYTE_SIZE:
- *val = byte_sz;
- break;
- case BPF_FIELD_SIGNED:
- /* enums will be assumed unsigned */
- *val = btf_is_enum(mt) ||
- (btf_int_encoding(mt) & BTF_INT_SIGNED);
- if (validate)
- *validate = true; /* signedness is never ambiguous */
- break;
- case BPF_FIELD_LSHIFT_U64:
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- *val = 64 - (bit_off + bit_sz - byte_off * 8);
-#else
- *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
-#endif
- break;
- case BPF_FIELD_RSHIFT_U64:
- *val = 64 - bit_sz;
- if (validate)
- *validate = true; /* right shift is never ambiguous */
- break;
- case BPF_FIELD_EXISTS:
- default:
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
- const struct bpf_core_spec *spec,
- __u32 *val)
-{
- __s64 sz;
-
- /* type-based relos return zero when target type is not found */
- if (!spec) {
- *val = 0;
- return 0;
- }
-
- switch (relo->kind) {
- case BPF_TYPE_ID_TARGET:
- *val = spec->root_type_id;
- break;
- case BPF_TYPE_EXISTS:
- *val = 1;
- break;
- case BPF_TYPE_SIZE:
- sz = btf__resolve_size(spec->btf, spec->root_type_id);
- if (sz < 0)
- return -EINVAL;
- *val = sz;
- break;
- case BPF_TYPE_ID_LOCAL:
- /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
- default:
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
- const struct bpf_core_spec *spec,
- __u32 *val)
-{
- const struct btf_type *t;
- const struct btf_enum *e;
-
- switch (relo->kind) {
- case BPF_ENUMVAL_EXISTS:
- *val = spec ? 1 : 0;
- break;
- case BPF_ENUMVAL_VALUE:
- if (!spec)
- return -EUCLEAN; /* request instruction poisoning */
- t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
- e = btf_enum(t) + spec->spec[0].idx;
- *val = e->val;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-struct bpf_core_relo_res
-{
- /* expected value in the instruction, unless validate == false */
- __u32 orig_val;
- /* new value that needs to be patched up to */
- __u32 new_val;
- /* relocation unsuccessful, poison instruction, but don't fail load */
- bool poison;
- /* some relocations can't be validated against orig_val */
- bool validate;
- /* for field byte offset relocations or the forms:
- * *(T *)(rX + <off>) = rY
- * rX = *(T *)(rY + <off>),
- * we remember original and resolved field size to adjust direct
- * memory loads of pointers and integers; this is necessary for 32-bit
- * host kernel architectures, but also allows to automatically
- * relocate fields that were resized from, e.g., u32 to u64, etc.
- */
- bool fail_memsz_adjust;
- __u32 orig_sz;
- __u32 orig_type_id;
- __u32 new_sz;
- __u32 new_type_id;
-};
-
-/* Calculate original and target relocation values, given local and target
- * specs and relocation kind. These values are calculated for each candidate.
- * If there are multiple candidates, resulting values should all be consistent
- * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
- * If instruction has to be poisoned, *poison will be set to true.
- */
-static int bpf_core_calc_relo(const struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- int relo_idx,
- const struct bpf_core_spec *local_spec,
- const struct bpf_core_spec *targ_spec,
- struct bpf_core_relo_res *res)
-{
- int err = -EOPNOTSUPP;
-
- res->orig_val = 0;
- res->new_val = 0;
- res->poison = false;
- res->validate = true;
- res->fail_memsz_adjust = false;
- res->orig_sz = res->new_sz = 0;
- res->orig_type_id = res->new_type_id = 0;
-
- if (core_relo_is_field_based(relo->kind)) {
- err = bpf_core_calc_field_relo(prog, relo, local_spec,
- &res->orig_val, &res->orig_sz,
- &res->orig_type_id, &res->validate);
- err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec,
- &res->new_val, &res->new_sz,
- &res->new_type_id, NULL);
- if (err)
- goto done;
- /* Validate if it's safe to adjust load/store memory size.
- * Adjustments are performed only if original and new memory
- * sizes differ.
- */
- res->fail_memsz_adjust = false;
- if (res->orig_sz != res->new_sz) {
- const struct btf_type *orig_t, *new_t;
-
- orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
- new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
-
- /* There are two use cases in which it's safe to
- * adjust load/store's mem size:
- * - reading a 32-bit kernel pointer, while on BPF
- * size pointers are always 64-bit; in this case
- * it's safe to "downsize" instruction size due to
- * pointer being treated as unsigned integer with
- * zero-extended upper 32-bits;
- * - reading unsigned integers, again due to
- * zero-extension is preserving the value correctly.
- *
- * In all other cases it's incorrect to attempt to
- * load/store field because read value will be
- * incorrect, so we poison relocated instruction.
- */
- if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
- goto done;
- if (btf_is_int(orig_t) && btf_is_int(new_t) &&
- btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
- btf_int_encoding(new_t) != BTF_INT_SIGNED)
- goto done;
-
- /* mark as invalid mem size adjustment, but this will
- * only be checked for LDX/STX/ST insns
- */
- res->fail_memsz_adjust = true;
- }
- } else if (core_relo_is_type_based(relo->kind)) {
- err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
- err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
- } else if (core_relo_is_enumval_based(relo->kind)) {
- err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
- err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
- }
-
-done:
- if (err == -EUCLEAN) {
- /* EUCLEAN is used to signal instruction poisoning request */
- res->poison = true;
- err = 0;
- } else if (err == -EOPNOTSUPP) {
- /* EOPNOTSUPP means unknown/unsupported relocation */
- pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
- prog->name, relo_idx, core_relo_kind_str(relo->kind),
- relo->kind, relo->insn_off / 8);
- }
-
- return err;
-}
-
-/*
- * Turn instruction for which CO_RE relocation failed into invalid one with
- * distinct signature.
- */
-static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
- int insn_idx, struct bpf_insn *insn)
-{
- pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
- prog->name, relo_idx, insn_idx);
- insn->code = BPF_JMP | BPF_CALL;
- insn->dst_reg = 0;
- insn->src_reg = 0;
- insn->off = 0;
- /* if this instruction is reachable (not a dead code),
- * verifier will complain with the following message:
- * invalid func unknown#195896080
- */
- insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
-}
-
-static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
-{
- switch (BPF_SIZE(insn->code)) {
- case BPF_DW: return 8;
- case BPF_W: return 4;
- case BPF_H: return 2;
- case BPF_B: return 1;
- default: return -1;
- }
-}
-
-static int insn_bytes_to_bpf_size(__u32 sz)
-{
- switch (sz) {
- case 8: return BPF_DW;
- case 4: return BPF_W;
- case 2: return BPF_H;
- case 1: return BPF_B;
- default: return -1;
- }
-}
-
-/*
- * Patch relocatable BPF instruction.
- *
- * Patched value is determined by relocation kind and target specification.
- * For existence relocations target spec will be NULL if field/type is not found.
- * Expected insn->imm value is determined using relocation kind and local
- * spec, and is checked before patching instruction. If actual insn->imm value
- * is wrong, bail out with error.
- *
- * Currently supported classes of BPF instruction are:
- * 1. rX = <imm> (assignment with immediate operand);
- * 2. rX += <imm> (arithmetic operations with immediate operand);
- * 3. rX = <imm64> (load with 64-bit immediate value);
- * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
- * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
- * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
- */
-static int bpf_core_patch_insn(struct bpf_program *prog,
- const struct bpf_core_relo *relo,
- int relo_idx,
- const struct bpf_core_relo_res *res)
-{
- __u32 orig_val, new_val;
- struct bpf_insn *insn;
- int insn_idx;
- __u8 class;
-
- if (relo->insn_off % BPF_INSN_SZ)
- return -EINVAL;
- insn_idx = relo->insn_off / BPF_INSN_SZ;
- /* adjust insn_idx from section frame of reference to the local
- * program's frame of reference; (sub-)program code is not yet
- * relocated, so it's enough to just subtract in-section offset
- */
- insn_idx = insn_idx - prog->sec_insn_off;
- insn = &prog->insns[insn_idx];
- class = BPF_CLASS(insn->code);
-
- if (res->poison) {
-poison:
- /* poison second part of ldimm64 to avoid confusing error from
- * verifier about "unknown opcode 00"
- */
- if (is_ldimm64_insn(insn))
- bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
- bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
- return 0;
- }
-
- orig_val = res->orig_val;
- new_val = res->new_val;
-
- switch (class) {
- case BPF_ALU:
- case BPF_ALU64:
- if (BPF_SRC(insn->code) != BPF_K)
- return -EINVAL;
- if (res->validate && insn->imm != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
- prog->name, relo_idx,
- insn_idx, insn->imm, orig_val, new_val);
- return -EINVAL;
- }
- orig_val = insn->imm;
- insn->imm = new_val;
- pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
- prog->name, relo_idx, insn_idx,
- orig_val, new_val);
- break;
- case BPF_LDX:
- case BPF_ST:
- case BPF_STX:
- if (res->validate && insn->off != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
- prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val);
- return -EINVAL;
- }
- if (new_val > SHRT_MAX) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
- prog->name, relo_idx, insn_idx, new_val);
- return -ERANGE;
- }
- if (res->fail_memsz_adjust) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
- "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
- prog->name, relo_idx, insn_idx);
- goto poison;
- }
-
- orig_val = insn->off;
- insn->off = new_val;
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
- prog->name, relo_idx, insn_idx, orig_val, new_val);
-
- if (res->new_sz != res->orig_sz) {
- int insn_bytes_sz, insn_bpf_sz;
-
- insn_bytes_sz = insn_bpf_size_to_bytes(insn);
- if (insn_bytes_sz != res->orig_sz) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
- prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
- return -EINVAL;
- }
-
- insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
- if (insn_bpf_sz < 0) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
- prog->name, relo_idx, insn_idx, res->new_sz);
- return -EINVAL;
- }
-
- insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
- prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
- }
- break;
- case BPF_LD: {
- __u64 imm;
-
- if (!is_ldimm64_insn(insn) ||
- insn[0].src_reg != 0 || insn[0].off != 0 ||
- insn_idx + 1 >= prog->insns_cnt ||
- insn[1].code != 0 || insn[1].dst_reg != 0 ||
- insn[1].src_reg != 0 || insn[1].off != 0) {
- pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
- prog->name, relo_idx, insn_idx);
- return -EINVAL;
- }
-
- imm = insn[0].imm + ((__u64)insn[1].imm << 32);
- if (res->validate && imm != orig_val) {
- pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
- prog->name, relo_idx,
- insn_idx, (unsigned long long)imm,
- orig_val, new_val);
- return -EINVAL;
- }
-
- insn[0].imm = new_val;
- insn[1].imm = 0; /* currently only 32-bit values are supported */
- pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
- prog->name, relo_idx, insn_idx,
- (unsigned long long)imm, new_val);
- break;
- }
- default:
- pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
- prog->name, relo_idx, insn_idx, insn->code,
- insn->src_reg, insn->dst_reg, insn->off, insn->imm);
- return -EINVAL;
- }
-
- return 0;
-}
-
-/* Output spec definition in the format:
- * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
- * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
- */
-static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
-{
- const struct btf_type *t;
- const struct btf_enum *e;
- const char *s;
- __u32 type_id;
- int i;
-
- type_id = spec->root_type_id;
- t = btf__type_by_id(spec->btf, type_id);
- s = btf__name_by_offset(spec->btf, t->name_off);
-
- libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
-
- if (core_relo_is_type_based(spec->relo_kind))
- return;
-
- if (core_relo_is_enumval_based(spec->relo_kind)) {
- t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
- e = btf_enum(t) + spec->raw_spec[0];
- s = btf__name_by_offset(spec->btf, e->name_off);
-
- libbpf_print(level, "::%s = %u", s, e->val);
- return;
- }
-
- if (core_relo_is_field_based(spec->relo_kind)) {
- for (i = 0; i < spec->len; i++) {
- if (spec->spec[i].name)
- libbpf_print(level, ".%s", spec->spec[i].name);
- else if (i > 0 || spec->spec[i].idx > 0)
- libbpf_print(level, "[%u]", spec->spec[i].idx);
- }
-
- libbpf_print(level, " (");
- for (i = 0; i < spec->raw_len; i++)
- libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
-
- if (spec->bit_offset % 8)
- libbpf_print(level, " @ offset %u.%u)",
- spec->bit_offset / 8, spec->bit_offset % 8);
- else
- libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
- return;
- }
-}
-
static size_t bpf_core_hash_fn(const void *key, void *ctx)
{
return (size_t)key;
@@ -6130,73 +5109,33 @@ static void *u32_as_hash_key(__u32 x)
return (void *)(uintptr_t)x;
}
-/*
- * CO-RE relocate single instruction.
- *
- * The outline and important points of the algorithm:
- * 1. For given local type, find corresponding candidate target types.
- * Candidate type is a type with the same "essential" name, ignoring
- * everything after last triple underscore (___). E.g., `sample`,
- * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
- * for each other. Names with triple underscore are referred to as
- * "flavors" and are useful, among other things, to allow to
- * specify/support incompatible variations of the same kernel struct, which
- * might differ between different kernel versions and/or build
- * configurations.
- *
- * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
- * converter, when deduplicated BTF of a kernel still contains more than
- * one different types with the same name. In that case, ___2, ___3, etc
- * are appended starting from second name conflict. But start flavors are
- * also useful to be defined "locally", in BPF program, to extract same
- * data from incompatible changes between different kernel
- * versions/configurations. For instance, to handle field renames between
- * kernel versions, one can use two flavors of the struct name with the
- * same common name and use conditional relocations to extract that field,
- * depending on target kernel version.
- * 2. For each candidate type, try to match local specification to this
- * candidate target type. Matching involves finding corresponding
- * high-level spec accessors, meaning that all named fields should match,
- * as well as all array accesses should be within the actual bounds. Also,
- * types should be compatible (see bpf_core_fields_are_compat for details).
- * 3. It is supported and expected that there might be multiple flavors
- * matching the spec. As long as all the specs resolve to the same set of
- * offsets across all candidates, there is no error. If there is any
- * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
- * imprefection of BTF deduplication, which can cause slight duplication of
- * the same BTF type, if some directly or indirectly referenced (by
- * pointer) type gets resolved to different actual types in different
- * object files. If such situation occurs, deduplicated BTF will end up
- * with two (or more) structurally identical types, which differ only in
- * types they refer to through pointer. This should be OK in most cases and
- * is not an error.
- * 4. Candidate types search is performed by linearly scanning through all
- * types in target BTF. It is anticipated that this is overall more
- * efficient memory-wise and not significantly worse (if not better)
- * CPU-wise compared to prebuilding a map from all local type names to
- * a list of candidate type names. It's also sped up by caching resolved
- * list of matching candidates per each local "root" type ID, that has at
- * least one bpf_core_relo associated with it. This list is shared
- * between multiple relocations for the same type ID and is updated as some
- * of the candidates are pruned due to structural incompatibility.
- */
static int bpf_core_apply_relo(struct bpf_program *prog,
const struct bpf_core_relo *relo,
int relo_idx,
const struct btf *local_btf,
struct hashmap *cand_cache)
{
- struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
const void *type_key = u32_as_hash_key(relo->type_id);
- struct bpf_core_relo_res cand_res, targ_res;
+ struct bpf_core_cand_list *cands = NULL;
+ const char *prog_name = prog->name;
const struct btf_type *local_type;
const char *local_name;
- struct core_cand_list *cands = NULL;
- __u32 local_id;
- const char *spec_str;
- int i, j, err;
+ __u32 local_id = relo->type_id;
+ struct bpf_insn *insn;
+ int insn_idx, err;
+
+ if (relo->insn_off % BPF_INSN_SZ)
+ return -EINVAL;
+ insn_idx = relo->insn_off / BPF_INSN_SZ;
+ /* adjust insn_idx from section frame of reference to the local
+ * program's frame of reference; (sub-)program code is not yet
+ * relocated, so it's enough to just subtract in-section offset
+ */
+ insn_idx = insn_idx - prog->sec_insn_off;
+ if (insn_idx > prog->insns_cnt)
+ return -EINVAL;
+ insn = &prog->insns[insn_idx];
- local_id = relo->type_id;
local_type = btf__type_by_id(local_btf, local_id);
if (!local_type)
return -EINVAL;
@@ -6205,51 +5144,19 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
if (!local_name)
return -EINVAL;
- spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
- if (str_is_empty(spec_str))
- return -EINVAL;
-
if (prog->obj->gen_loader) {
- pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
+ pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n",
prog - prog->obj->programs, relo->insn_off / 8,
- local_name, spec_str, relo->kind);
+ local_name, relo->kind);
return -ENOTSUP;
}
- err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
- if (err) {
- pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
- prog->name, relo_idx, local_id, btf_kind_str(local_type),
- str_is_empty(local_name) ? "<anon>" : local_name,
- spec_str, err);
- return -EINVAL;
- }
-
- pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name,
- relo_idx, core_relo_kind_str(relo->kind), relo->kind);
- bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
- libbpf_print(LIBBPF_DEBUG, "\n");
- /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
- if (relo->kind == BPF_TYPE_ID_LOCAL) {
- targ_res.validate = true;
- targ_res.poison = false;
- targ_res.orig_val = local_spec.root_type_id;
- targ_res.new_val = local_spec.root_type_id;
- goto patch_insn;
- }
-
- /* libbpf doesn't support candidate search for anonymous types */
- if (str_is_empty(spec_str)) {
- pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
- prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
- return -EOPNOTSUPP;
- }
-
- if (!hashmap__find(cand_cache, type_key, (void **)&cands)) {
+ if (relo->kind != BPF_TYPE_ID_LOCAL &&
+ !hashmap__find(cand_cache, type_key, (void **)&cands)) {
cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
if (IS_ERR(cands)) {
pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
- prog->name, relo_idx, local_id, btf_kind_str(local_type),
+ prog_name, relo_idx, local_id, btf_kind_str(local_type),
local_name, PTR_ERR(cands));
return PTR_ERR(cands);
}
@@ -6260,97 +5167,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
}
}
- for (i = 0, j = 0; i < cands->len; i++) {
- err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
- cands->cands[i].id, &cand_spec);
- if (err < 0) {
- pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
- prog->name, relo_idx, i);
- bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
- libbpf_print(LIBBPF_WARN, ": %d\n", err);
- return err;
- }
-
- pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name,
- relo_idx, err == 0 ? "non-matching" : "matching", i);
- bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
- libbpf_print(LIBBPF_DEBUG, "\n");
-
- if (err == 0)
- continue;
-
- err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
- if (err)
- return err;
-
- if (j == 0) {
- targ_res = cand_res;
- targ_spec = cand_spec;
- } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
- /* if there are many field relo candidates, they
- * should all resolve to the same bit offset
- */
- pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
- prog->name, relo_idx, cand_spec.bit_offset,
- targ_spec.bit_offset);
- return -EINVAL;
- } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
- /* all candidates should result in the same relocation
- * decision and value, otherwise it's dangerous to
- * proceed due to ambiguity
- */
- pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
- prog->name, relo_idx,
- cand_res.poison ? "failure" : "success", cand_res.new_val,
- targ_res.poison ? "failure" : "success", targ_res.new_val);
- return -EINVAL;
- }
-
- cands->cands[j++] = cands->cands[i];
- }
-
- /*
- * For BPF_FIELD_EXISTS relo or when used BPF program has field
- * existence checks or kernel version/config checks, it's expected
- * that we might not find any candidates. In this case, if field
- * wasn't found in any candidate, the list of candidates shouldn't
- * change at all, we'll just handle relocating appropriately,
- * depending on relo's kind.
- */
- if (j > 0)
- cands->len = j;
-
- /*
- * If no candidates were found, it might be both a programmer error,
- * as well as expected case, depending whether instruction w/
- * relocation is guarded in some way that makes it unreachable (dead
- * code) if relocation can't be resolved. This is handled in
- * bpf_core_patch_insn() uniformly by replacing that instruction with
- * BPF helper call insn (using invalid helper ID). If that instruction
- * is indeed unreachable, then it will be ignored and eliminated by
- * verifier. If it was an error, then verifier will complain and point
- * to a specific instruction number in its log.
- */
- if (j == 0) {
- pr_debug("prog '%s': relo #%d: no matching targets found\n",
- prog->name, relo_idx);
-
- /* calculate single target relo result explicitly */
- err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res);
- if (err)
- return err;
- }
-
-patch_insn:
- /* bpf_core_patch_insn() should know how to handle missing targ_spec */
- err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
- if (err) {
- pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n",
- prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err);
- return -EINVAL;
- }
-
- return 0;
+ return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands);
}
static int
@@ -6496,11 +5313,11 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
}
insn[1].imm = ext->kcfg.data_off;
} else /* EXT_KSYM */ {
- if (ext->ksym.type_id) { /* typed ksyms */
+ if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
insn[0].src_reg = BPF_PSEUDO_BTF_ID;
insn[0].imm = ext->ksym.kernel_btf_id;
insn[1].imm = ext->ksym.kernel_btf_obj_fd;
- } else { /* typeless ksyms */
+ } else { /* typeless ksyms or unresolved typed ksyms */
insn[0].imm = (__u32)ext->ksym.addr;
insn[1].imm = ext->ksym.addr >> 32;
}
@@ -7190,7 +6007,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj)
for (i = 0; i < obj->nr_programs; i++) {
struct bpf_program *p = &obj->programs[i];
-
+
if (!p->nr_reloc)
continue;
@@ -7554,7 +6371,7 @@ static struct bpf_object *
__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts)
{
- const char *obj_name, *kconfig;
+ const char *obj_name, *kconfig, *btf_tmp_path;
struct bpf_program *prog;
struct bpf_object *obj;
char tmp_name[64];
@@ -7585,11 +6402,26 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
if (IS_ERR(obj))
return obj;
+ btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
+ if (btf_tmp_path) {
+ if (strlen(btf_tmp_path) >= PATH_MAX) {
+ err = -ENAMETOOLONG;
+ goto out;
+ }
+ obj->btf_custom_path = strdup(btf_tmp_path);
+ if (!obj->btf_custom_path) {
+ err = -ENOMEM;
+ goto out;
+ }
+ }
+
kconfig = OPTS_GET(opts, kconfig, NULL);
if (kconfig) {
obj->kconfig = strdup(kconfig);
- if (!obj->kconfig)
- return ERR_PTR(-ENOMEM);
+ if (!obj->kconfig) {
+ err = -ENOMEM;
+ goto out;
+ }
}
err = bpf_object__elf_init(obj);
@@ -7812,11 +6644,8 @@ static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
break;
}
}
- if (id <= 0) {
- pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
- __btf_kind_str(kind), ksym_name);
+ if (id <= 0)
return -ESRCH;
- }
*res_btf = btf;
*res_btf_fd = btf_fd;
@@ -7833,8 +6662,13 @@ static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
struct btf *btf = NULL;
id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd);
- if (id < 0)
+ if (id == -ESRCH && ext->is_weak) {
+ return 0;
+ } else if (id < 0) {
+ pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
+ ext->name);
return id;
+ }
/* find local type_id */
local_type_id = ext->ksym.type_id;
@@ -8055,7 +6889,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
err = err ? : bpf_object__sanitize_maps(obj);
err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
err = err ? : bpf_object__create_maps(obj);
- err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
+ err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path);
err = err ? : bpf_object__load_progs(obj, attr->log_level);
if (obj->gen_loader) {
@@ -8450,6 +7284,11 @@ const char *bpf_map__get_pin_path(const struct bpf_map *map)
return map->pin_path;
}
+const char *bpf_map__pin_path(const struct bpf_map *map)
+{
+ return map->pin_path;
+}
+
bool bpf_map__is_pinned(const struct bpf_map *map)
{
return map->pinned;
@@ -8702,6 +7541,7 @@ void bpf_object__close(struct bpf_object *obj)
for (i = 0; i < obj->nr_maps; i++)
bpf_map__destroy(&obj->maps[i]);
+ zfree(&obj->btf_custom_path);
zfree(&obj->kconfig);
zfree(&obj->externs);
obj->nr_extern = 0;
@@ -9471,7 +8311,7 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
ret = snprintf(btf_type_name, sizeof(btf_type_name),
"%s%s", prefix, name);
/* snprintf returns the number of characters written excluding the
- * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
+ * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
* indicates truncation.
*/
if (ret < 0 || ret >= sizeof(btf_type_name))
@@ -9495,7 +8335,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
struct btf *btf;
int err;
- btf = libbpf_find_kernel_btf();
+ btf = btf__load_vmlinux_btf();
err = libbpf_get_error(btf);
if (err) {
pr_warn("vmlinux BTF is not found\n");
@@ -9514,8 +8354,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
{
struct bpf_prog_info_linear *info_linear;
struct bpf_prog_info *info;
- struct btf *btf = NULL;
- int err = -EINVAL;
+ struct btf *btf;
+ int err;
info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
err = libbpf_get_error(info_linear);
@@ -9524,12 +8364,15 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
attach_prog_fd);
return err;
}
+
+ err = -EINVAL;
info = &info_linear->info;
if (!info->btf_id) {
pr_warn("The target program doesn't have BTF\n");
goto out;
}
- if (btf__get_from_id(info->btf_id, &btf)) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
pr_warn("Failed to get BTF of the program\n");
goto out;
}
@@ -10003,7 +8846,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
struct bpf_link {
int (*detach)(struct bpf_link *link);
- int (*destroy)(struct bpf_link *link);
+ void (*dealloc)(struct bpf_link *link);
char *pin_path; /* NULL, if not pinned */
int fd; /* hook FD, -1 if not applicable */
bool disconnected;
@@ -10013,7 +8856,7 @@ struct bpf_link {
int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
{
int ret;
-
+
ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
return libbpf_err_errno(ret);
}
@@ -10042,11 +8885,12 @@ int bpf_link__destroy(struct bpf_link *link)
if (!link->disconnected && link->detach)
err = link->detach(link);
- if (link->destroy)
- link->destroy(link);
if (link->pin_path)
free(link->pin_path);
- free(link);
+ if (link->dealloc)
+ link->dealloc(link);
+ else
+ free(link);
return libbpf_err(err);
}
@@ -10143,23 +8987,42 @@ int bpf_link__unpin(struct bpf_link *link)
return 0;
}
-static int bpf_link__detach_perf_event(struct bpf_link *link)
+struct bpf_link_perf {
+ struct bpf_link link;
+ int perf_event_fd;
+};
+
+static int bpf_link_perf_detach(struct bpf_link *link)
{
- int err;
+ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+ int err = 0;
- err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
- if (err)
+ if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
err = -errno;
+ if (perf_link->perf_event_fd != link->fd)
+ close(perf_link->perf_event_fd);
close(link->fd);
+
return libbpf_err(err);
}
-struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
+static void bpf_link_perf_dealloc(struct bpf_link *link)
+{
+ struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+
+ free(perf_link);
+}
+
+struct bpf_link *bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd,
+ const struct bpf_perf_event_opts *opts)
{
char errmsg[STRERR_BUFSIZE];
- struct bpf_link *link;
- int prog_fd, err;
+ struct bpf_link_perf *link;
+ int prog_fd, link_fd = -1, err;
+
+ if (!OPTS_VALID(opts, bpf_perf_event_opts))
+ return libbpf_err_ptr(-EINVAL);
if (pfd < 0) {
pr_warn("prog '%s': invalid perf event FD %d\n",
@@ -10176,27 +9039,59 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pf
link = calloc(1, sizeof(*link));
if (!link)
return libbpf_err_ptr(-ENOMEM);
- link->detach = &bpf_link__detach_perf_event;
- link->fd = pfd;
+ link->link.detach = &bpf_link_perf_detach;
+ link->link.dealloc = &bpf_link_perf_dealloc;
+ link->perf_event_fd = pfd;
- if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
- err = -errno;
- free(link);
- pr_warn("prog '%s': failed to attach to pfd %d: %s\n",
- prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- if (err == -EPROTO)
- pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
- prog->name, pfd);
- return libbpf_err_ptr(err);
+ if (kernel_supports(prog->obj, FEAT_PERF_LINK)) {
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
+ .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
+
+ link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
+ if (link_fd < 0) {
+ err = -errno;
+ pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
+ prog->name, pfd,
+ err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ goto err_out;
+ }
+ link->link.fd = link_fd;
+ } else {
+ if (OPTS_GET(opts, bpf_cookie, 0)) {
+ pr_warn("prog '%s': user context value is not supported\n", prog->name);
+ err = -EOPNOTSUPP;
+ goto err_out;
+ }
+
+ if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
+ err = -errno;
+ pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
+ prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ if (err == -EPROTO)
+ pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
+ prog->name, pfd);
+ goto err_out;
+ }
+ link->link.fd = pfd;
}
if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
err = -errno;
- free(link);
- pr_warn("prog '%s': failed to enable pfd %d: %s\n",
+ pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
- return libbpf_err_ptr(err);
+ goto err_out;
}
- return link;
+
+ return &link->link;
+err_out:
+ if (link_fd >= 0)
+ close(link_fd);
+ free(link);
+ return libbpf_err_ptr(err);
+}
+
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
+{
+ return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
}
/*
@@ -10257,13 +9152,19 @@ static int determine_uprobe_retprobe_bit(void)
return parse_uint_from_file(file, "config:%d\n");
}
+#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
+#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
+
static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
- uint64_t offset, int pid)
+ uint64_t offset, int pid, size_t ref_ctr_off)
{
struct perf_event_attr attr = {};
char errmsg[STRERR_BUFSIZE];
int type, pfd, err;
+ if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
+ return -EINVAL;
+
type = uprobe ? determine_uprobe_perf_type()
: determine_kprobe_perf_type();
if (type < 0) {
@@ -10286,6 +9187,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
}
attr.size = sizeof(attr);
attr.type = type;
+ attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
attr.config2 = offset; /* kprobe_addr or probe_offset */
@@ -10304,23 +9206,34 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
return pfd;
}
-struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
- bool retprobe,
- const char *func_name)
+struct bpf_link *
+bpf_program__attach_kprobe_opts(struct bpf_program *prog,
+ const char *func_name,
+ const struct bpf_kprobe_opts *opts)
{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
+ unsigned long offset;
+ bool retprobe;
int pfd, err;
+ if (!OPTS_VALID(opts, bpf_kprobe_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ retprobe = OPTS_GET(opts, retprobe, false);
+ offset = OPTS_GET(opts, offset, 0);
+ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
- 0 /* offset */, -1 /* pid */);
+ offset, -1 /* pid */, 0 /* ref_ctr_off */);
if (pfd < 0) {
pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return libbpf_err_ptr(pfd);
}
- link = bpf_program__attach_perf_event(prog, pfd);
+ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
err = libbpf_get_error(link);
if (err) {
close(pfd);
@@ -10332,29 +9245,70 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
return link;
}
+struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
+ bool retprobe,
+ const char *func_name)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
+ .retprobe = retprobe,
+ );
+
+ return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
+}
+
static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
struct bpf_program *prog)
{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
+ unsigned long offset = 0;
+ struct bpf_link *link;
const char *func_name;
- bool retprobe;
+ char *func;
+ int n, err;
func_name = prog->sec_name + sec->len;
- retprobe = strcmp(sec->sec, "kretprobe/") == 0;
+ opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0;
+
+ n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
+ if (n < 1) {
+ err = -EINVAL;
+ pr_warn("kprobe name is invalid: %s\n", func_name);
+ return libbpf_err_ptr(err);
+ }
+ if (opts.retprobe && offset != 0) {
+ free(func);
+ err = -EINVAL;
+ pr_warn("kretprobes do not support offset specification\n");
+ return libbpf_err_ptr(err);
+ }
- return bpf_program__attach_kprobe(prog, retprobe, func_name);
+ opts.offset = offset;
+ link = bpf_program__attach_kprobe_opts(prog, func, &opts);
+ free(func);
+ return link;
}
-struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
- bool retprobe, pid_t pid,
- const char *binary_path,
- size_t func_offset)
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid,
+ const char *binary_path, size_t func_offset,
+ const struct bpf_uprobe_opts *opts)
{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
+ size_t ref_ctr_off;
int pfd, err;
+ bool retprobe;
+
+ if (!OPTS_VALID(opts, bpf_uprobe_opts))
+ return libbpf_err_ptr(-EINVAL);
- pfd = perf_event_open_probe(true /* uprobe */, retprobe,
- binary_path, func_offset, pid);
+ retprobe = OPTS_GET(opts, retprobe, false);
+ ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
+ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
+ pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
+ func_offset, pid, ref_ctr_off);
if (pfd < 0) {
pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
prog->name, retprobe ? "uretprobe" : "uprobe",
@@ -10362,7 +9316,7 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return libbpf_err_ptr(pfd);
}
- link = bpf_program__attach_perf_event(prog, pfd);
+ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
err = libbpf_get_error(link);
if (err) {
close(pfd);
@@ -10375,6 +9329,16 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
return link;
}
+struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
+ bool retprobe, pid_t pid,
+ const char *binary_path,
+ size_t func_offset)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
+
+ return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
+}
+
static int determine_tracepoint_id(const char *tp_category,
const char *tp_name)
{
@@ -10425,14 +9389,21 @@ static int perf_event_open_tracepoint(const char *tp_category,
return pfd;
}
-struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
- const char *tp_category,
- const char *tp_name)
+struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name,
+ const struct bpf_tracepoint_opts *opts)
{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
int pfd, err;
+ if (!OPTS_VALID(opts, bpf_tracepoint_opts))
+ return libbpf_err_ptr(-EINVAL);
+
+ pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
+
pfd = perf_event_open_tracepoint(tp_category, tp_name);
if (pfd < 0) {
pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
@@ -10440,7 +9411,7 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return libbpf_err_ptr(pfd);
}
- link = bpf_program__attach_perf_event(prog, pfd);
+ link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
err = libbpf_get_error(link);
if (err) {
close(pfd);
@@ -10452,6 +9423,13 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
return link;
}
+struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name)
+{
+ return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
+}
+
static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
struct bpf_program *prog)
{
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 6e61342ba56c..f177d897c5f7 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -94,8 +94,15 @@ struct bpf_object_open_opts {
* system Kconfig for CONFIG_xxx externs.
*/
const char *kconfig;
+ /* Path to the custom BTF to be used for BPF CO-RE relocations.
+ * This custom BTF completely replaces the use of vmlinux BTF
+ * for the purpose of CO-RE relocations.
+ * NOTE: any other BPF feature (e.g., fentry/fexit programs,
+ * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux.
+ */
+ const char *btf_custom_path;
};
-#define bpf_object_open_opts__last_field kconfig
+#define bpf_object_open_opts__last_field btf_custom_path
LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
LIBBPF_API struct bpf_object *
@@ -237,20 +244,86 @@ LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
LIBBPF_API struct bpf_link *
bpf_program__attach(struct bpf_program *prog);
+
+struct bpf_perf_event_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+};
+#define bpf_perf_event_opts__last_field bpf_cookie
+
LIBBPF_API struct bpf_link *
bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd,
+ const struct bpf_perf_event_opts *opts);
+
+struct bpf_kprobe_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+ /* function's offset to install kprobe to */
+ unsigned long offset;
+ /* kprobe is return probe */
+ bool retprobe;
+ size_t :0;
+};
+#define bpf_kprobe_opts__last_field retprobe
+
LIBBPF_API struct bpf_link *
bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
const char *func_name);
LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe_opts(struct bpf_program *prog,
+ const char *func_name,
+ const struct bpf_kprobe_opts *opts);
+
+struct bpf_uprobe_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* offset of kernel reference counted USDT semaphore, added in
+ * a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
+ */
+ size_t ref_ctr_offset;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+ /* uprobe is return probe, invoked at function return time */
+ bool retprobe;
+ size_t :0;
+};
+#define bpf_uprobe_opts__last_field retprobe
+
+LIBBPF_API struct bpf_link *
bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
pid_t pid, const char *binary_path,
size_t func_offset);
LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid,
+ const char *binary_path, size_t func_offset,
+ const struct bpf_uprobe_opts *opts);
+
+struct bpf_tracepoint_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* custom user-provided value fetchable through bpf_get_attach_cookie() */
+ __u64 bpf_cookie;
+};
+#define bpf_tracepoint_opts__last_field bpf_cookie
+
+LIBBPF_API struct bpf_link *
bpf_program__attach_tracepoint(struct bpf_program *prog,
const char *tp_category,
const char *tp_name);
LIBBPF_API struct bpf_link *
+bpf_program__attach_tracepoint_opts(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name,
+ const struct bpf_tracepoint_opts *opts);
+
+LIBBPF_API struct bpf_link *
bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
const char *tp_name);
LIBBPF_API struct bpf_link *
@@ -477,6 +550,7 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 944c99d1ded3..bbc53bb25f68 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -371,7 +371,18 @@ LIBBPF_0.4.0 {
LIBBPF_0.5.0 {
global:
bpf_map__initial_value;
+ bpf_map__pin_path;
bpf_map_lookup_and_delete_elem_flags;
+ bpf_program__attach_kprobe_opts;
+ bpf_program__attach_perf_event_opts;
+ bpf_program__attach_tracepoint_opts;
+ bpf_program__attach_uprobe_opts;
bpf_object__gen_loader;
+ btf__load_from_kernel_by_id;
+ btf__load_from_kernel_by_id_split;
+ btf__load_into_kernel;
+ btf__load_module_btf;
+ btf__load_vmlinux_btf;
+ btf_dump__dump_type_data;
libbpf_set_strict_mode;
} LIBBPF_0.4.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 016ca7cb4f8a..533b0211f40a 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -14,6 +14,7 @@
#include <errno.h>
#include <linux/err.h>
#include "libbpf_legacy.h"
+#include "relo_core.h"
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -195,6 +196,17 @@ void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
size_t cur_cnt, size_t max_cnt, size_t add_cnt);
int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
+static inline bool libbpf_is_mem_zeroed(const char *p, ssize_t len)
+{
+ while (len > 0) {
+ if (*p)
+ return false;
+ p++;
+ len--;
+ }
+ return true;
+}
+
static inline bool libbpf_validate_opts(const char *opts,
size_t opts_sz, size_t user_sz,
const char *type_name)
@@ -203,16 +215,9 @@ static inline bool libbpf_validate_opts(const char *opts,
pr_warn("%s size (%zu) is too small\n", type_name, user_sz);
return false;
}
- if (user_sz > opts_sz) {
- size_t i;
-
- for (i = opts_sz; i < user_sz; i++) {
- if (opts[i]) {
- pr_warn("%s has non-zero extra bytes\n",
- type_name);
- return false;
- }
- }
+ if (!libbpf_is_mem_zeroed(opts + opts_sz, (ssize_t)user_sz - opts_sz)) {
+ pr_warn("%s has non-zero extra bytes\n", type_name);
+ return false;
}
return true;
}
@@ -232,6 +237,14 @@ static inline bool libbpf_validate_opts(const char *opts,
(opts)->field = value; \
} while (0)
+#define OPTS_ZEROED(opts, last_nonzero_field) \
+({ \
+ ssize_t __off = offsetofend(typeof(*(opts)), last_nonzero_field); \
+ !(opts) || libbpf_is_mem_zeroed((const void *)opts + __off, \
+ (opts)->sz - __off); \
+})
+
+
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
@@ -366,76 +379,6 @@ struct bpf_line_info_min {
__u32 line_col;
};
-/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
- * has to be adjusted by relocations.
- */
-enum bpf_core_relo_kind {
- BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
- BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */
- BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
- BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
- BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
- BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
- BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
- BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */
- BPF_TYPE_EXISTS = 8, /* type existence in target kernel */
- BPF_TYPE_SIZE = 9, /* type size in bytes */
- BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
- BPF_ENUMVAL_VALUE = 11, /* enum value integer value */
-};
-
-/* The minimum bpf_core_relo checked by the loader
- *
- * CO-RE relocation captures the following data:
- * - insn_off - instruction offset (in bytes) within a BPF program that needs
- * its insn->imm field to be relocated with actual field info;
- * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
- * type or field;
- * - access_str_off - offset into corresponding .BTF string section. String
- * interpretation depends on specific relocation kind:
- * - for field-based relocations, string encodes an accessed field using
- * a sequence of field and array indices, separated by colon (:). It's
- * conceptually very close to LLVM's getelementptr ([0]) instruction's
- * arguments for identifying offset to a field.
- * - for type-based relocations, strings is expected to be just "0";
- * - for enum value-based relocations, string contains an index of enum
- * value within its enum type;
- *
- * Example to provide a better feel.
- *
- * struct sample {
- * int a;
- * struct {
- * int b[10];
- * };
- * };
- *
- * struct sample *s = ...;
- * int x = &s->a; // encoded as "0:0" (a is field #0)
- * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
- * // b is field #0 inside anon struct, accessing elem #5)
- * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
- *
- * type_id for all relocs in this example will capture BTF type id of
- * `struct sample`.
- *
- * Such relocation is emitted when using __builtin_preserve_access_index()
- * Clang built-in, passing expression that captures field address, e.g.:
- *
- * bpf_probe_read(&dst, sizeof(dst),
- * __builtin_preserve_access_index(&src->a.b.c));
- *
- * In this case Clang will emit field relocation recording necessary data to
- * be able to find offset of embedded `a.b.c` field within `src` struct.
- *
- * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
- */
-struct bpf_core_relo {
- __u32 insn_off;
- __u32 type_id;
- __u32 access_str_off;
- enum bpf_core_relo_kind kind;
-};
typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx);
typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx);
@@ -494,4 +437,14 @@ static inline void *libbpf_ptr(void *ret)
return ret;
}
+static inline bool str_is_empty(const char *s)
+{
+ return !s || !s[0];
+}
+
+static inline bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
new file mode 100644
index 000000000000..4016ed492d0c
--- /dev/null
+++ b/tools/lib/bpf/relo_core.c
@@ -0,0 +1,1295 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2019 Facebook */
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+#include <linux/err.h>
+
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+#include "str_error.h"
+#include "libbpf_internal.h"
+
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+ __u32 type_id; /* struct/union type or array element type */
+ __u32 idx; /* field index or array index */
+ const char *name; /* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+ const struct btf *btf;
+ /* high-level spec: named fields and array indices only */
+ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+ /* original unresolved (no skip_mods_or_typedefs) root type ID */
+ __u32 root_type_id;
+ /* CO-RE relocation kind */
+ enum bpf_core_relo_kind relo_kind;
+ /* high-level spec length */
+ int len;
+ /* raw, low-level spec: 1-to-1 with accessor spec string */
+ int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+ /* raw spec length */
+ int raw_len;
+ /* field bit offset represented by spec */
+ __u32 bit_offset;
+};
+
+static bool is_flex_arr(const struct btf *btf,
+ const struct bpf_core_accessor *acc,
+ const struct btf_array *arr)
+{
+ const struct btf_type *t;
+
+ /* not a flexible array, if not inside a struct or has non-zero size */
+ if (!acc->name || arr->nelems > 0)
+ return false;
+
+ /* has to be the last member of enclosing struct */
+ t = btf__type_by_id(btf, acc->type_id);
+ return acc->idx == btf_vlen(t) - 1;
+}
+
+static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_FIELD_BYTE_OFFSET: return "byte_off";
+ case BPF_FIELD_BYTE_SIZE: return "byte_sz";
+ case BPF_FIELD_EXISTS: return "field_exists";
+ case BPF_FIELD_SIGNED: return "signed";
+ case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
+ case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
+ case BPF_TYPE_ID_LOCAL: return "local_type_id";
+ case BPF_TYPE_ID_TARGET: return "target_type_id";
+ case BPF_TYPE_EXISTS: return "type_exists";
+ case BPF_TYPE_SIZE: return "type_size";
+ case BPF_ENUMVAL_EXISTS: return "enumval_exists";
+ case BPF_ENUMVAL_VALUE: return "enumval_value";
+ default: return "unknown";
+ }
+}
+
+static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_FIELD_BYTE_OFFSET:
+ case BPF_FIELD_BYTE_SIZE:
+ case BPF_FIELD_EXISTS:
+ case BPF_FIELD_SIGNED:
+ case BPF_FIELD_LSHIFT_U64:
+ case BPF_FIELD_RSHIFT_U64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_TYPE_ID_LOCAL:
+ case BPF_TYPE_ID_TARGET:
+ case BPF_TYPE_EXISTS:
+ case BPF_TYPE_SIZE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
+{
+ switch (kind) {
+ case BPF_ENUMVAL_EXISTS:
+ case BPF_ENUMVAL_VALUE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * Turn bpf_core_relo into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field bit offset, specified by accessor string. Low-level spec captures
+ * every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ * struct sample {
+ * int __unimportant;
+ * struct {
+ * int __1;
+ * int __2;
+ * int a[7];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ *
+ * int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ * - field 'a' access (corresponds to '2' in low-level spec);
+ * - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
+ * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
+ * spec and raw_spec are kept empty.
+ *
+ * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
+ * string to specify enumerator's value index that need to be relocated.
+ */
+static int bpf_core_parse_spec(const struct btf *btf,
+ __u32 type_id,
+ const char *spec_str,
+ enum bpf_core_relo_kind relo_kind,
+ struct bpf_core_spec *spec)
+{
+ int access_idx, parsed_len, i;
+ struct bpf_core_accessor *acc;
+ const struct btf_type *t;
+ const char *name;
+ __u32 id;
+ __s64 sz;
+
+ if (str_is_empty(spec_str) || *spec_str == ':')
+ return -EINVAL;
+
+ memset(spec, 0, sizeof(*spec));
+ spec->btf = btf;
+ spec->root_type_id = type_id;
+ spec->relo_kind = relo_kind;
+
+ /* type-based relocations don't have a field access string */
+ if (core_relo_is_type_based(relo_kind)) {
+ if (strcmp(spec_str, "0"))
+ return -EINVAL;
+ return 0;
+ }
+
+ /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+ while (*spec_str) {
+ if (*spec_str == ':')
+ ++spec_str;
+ if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+ return -EINVAL;
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+ spec_str += parsed_len;
+ spec->raw_spec[spec->raw_len++] = access_idx;
+ }
+
+ if (spec->raw_len == 0)
+ return -EINVAL;
+
+ t = skip_mods_and_typedefs(btf, type_id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[0];
+ acc = &spec->spec[0];
+ acc->type_id = id;
+ acc->idx = access_idx;
+ spec->len++;
+
+ if (core_relo_is_enumval_based(relo_kind)) {
+ if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ /* record enumerator name in a first accessor */
+ acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(relo_kind))
+ return -EINVAL;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->bit_offset = access_idx * sz * 8;
+
+ for (i = 1; i < spec->raw_len; i++) {
+ t = skip_mods_and_typedefs(btf, id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[i];
+ acc = &spec->spec[spec->len];
+
+ if (btf_is_composite(t)) {
+ const struct btf_member *m;
+ __u32 bit_offset;
+
+ if (access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ bit_offset = btf_member_bit_offset(t, access_idx);
+ spec->bit_offset += bit_offset;
+
+ m = btf_members(t) + access_idx;
+ if (m->name_off) {
+ name = btf__name_by_offset(btf, m->name_off);
+ if (str_is_empty(name))
+ return -EINVAL;
+
+ acc->type_id = id;
+ acc->idx = access_idx;
+ acc->name = name;
+ spec->len++;
+ }
+
+ id = m->type;
+ } else if (btf_is_array(t)) {
+ const struct btf_array *a = btf_array(t);
+ bool flex;
+
+ t = skip_mods_and_typedefs(btf, a->type, &id);
+ if (!t)
+ return -EINVAL;
+
+ flex = is_flex_arr(btf, acc - 1, a);
+ if (!flex && access_idx >= a->nelems)
+ return -EINVAL;
+
+ spec->spec[spec->len].type_id = id;
+ spec->spec[spec->len].idx = access_idx;
+ spec->len++;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->bit_offset += access_idx * sz * 8;
+ } else {
+ pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
+ type_id, spec_str, i, id, btf_kind_str(t));
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/* Check two types for compatibility for the purpose of field access
+ * relocation. const/volatile/restrict and typedefs are skipped to ensure we
+ * are relocating semantically compatible entities:
+ * - any two STRUCTs/UNIONs are compatible and can be mixed;
+ * - any two FWDs are compatible, if their names match (modulo flavor suffix);
+ * - any two PTRs are always compatible;
+ * - for ENUMs, names should be the same (ignoring flavor suffix) or at
+ * least one of enums should be anonymous;
+ * - for ENUMs, check sizes, names are ignored;
+ * - for INT, size and signedness are ignored;
+ * - any two FLOATs are always compatible;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+ __u32 local_id,
+ const struct btf *targ_btf,
+ __u32 targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+
+recur:
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+ return 1;
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_PTR:
+ case BTF_KIND_FLOAT:
+ return 1;
+ case BTF_KIND_FWD:
+ case BTF_KIND_ENUM: {
+ const char *local_name, *targ_name;
+ size_t local_len, targ_len;
+
+ local_name = btf__name_by_offset(local_btf,
+ local_type->name_off);
+ targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
+ local_len = bpf_core_essential_name_len(local_name);
+ targ_len = bpf_core_essential_name_len(targ_name);
+ /* one of them is anonymous or both w/ same flavor-less names */
+ return local_len == 0 || targ_len == 0 ||
+ (local_len == targ_len &&
+ strncmp(local_name, targ_name, local_len) == 0);
+ }
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 &&
+ btf_int_offset(targ_type) == 0;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ default:
+ pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
+ btf_kind(local_type), local_id, targ_id);
+ return 0;
+ }
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * bit offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+ const struct bpf_core_accessor *local_acc,
+ const struct btf *targ_btf,
+ __u32 targ_id,
+ struct bpf_core_spec *spec,
+ __u32 *next_targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+ const struct btf_member *local_member, *m;
+ const char *local_name, *targ_name;
+ __u32 local_id;
+ int i, n, found;
+
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+ if (!btf_is_composite(targ_type))
+ return 0;
+
+ local_id = local_acc->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ local_member = btf_members(local_type) + local_acc->idx;
+ local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+ n = btf_vlen(targ_type);
+ m = btf_members(targ_type);
+ for (i = 0; i < n; i++, m++) {
+ __u32 bit_offset;
+
+ bit_offset = btf_member_bit_offset(targ_type, i);
+
+ /* too deep struct/union/array nesting */
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ /* speculate this member will be the good one */
+ spec->bit_offset += bit_offset;
+ spec->raw_spec[spec->raw_len++] = i;
+
+ targ_name = btf__name_by_offset(targ_btf, m->name_off);
+ if (str_is_empty(targ_name)) {
+ /* embedded struct/union, we need to go deeper */
+ found = bpf_core_match_member(local_btf, local_acc,
+ targ_btf, m->type,
+ spec, next_targ_id);
+ if (found) /* either found or error */
+ return found;
+ } else if (strcmp(local_name, targ_name) == 0) {
+ /* matching named field */
+ struct bpf_core_accessor *targ_acc;
+
+ targ_acc = &spec->spec[spec->len++];
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+
+ *next_targ_id = m->type;
+ found = bpf_core_fields_are_compat(local_btf,
+ local_member->type,
+ targ_btf, m->type);
+ if (!found)
+ spec->len--; /* pop accessor */
+ return found;
+ }
+ /* member turned out not to be what we looked for */
+ spec->bit_offset -= bit_offset;
+ spec->raw_len--;
+ }
+
+ return 0;
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + bit offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+ const struct btf *targ_btf, __u32 targ_id,
+ struct bpf_core_spec *targ_spec)
+{
+ const struct btf_type *targ_type;
+ const struct bpf_core_accessor *local_acc;
+ struct bpf_core_accessor *targ_acc;
+ int i, sz, matched;
+
+ memset(targ_spec, 0, sizeof(*targ_spec));
+ targ_spec->btf = targ_btf;
+ targ_spec->root_type_id = targ_id;
+ targ_spec->relo_kind = local_spec->relo_kind;
+
+ if (core_relo_is_type_based(local_spec->relo_kind)) {
+ return bpf_core_types_are_compat(local_spec->btf,
+ local_spec->root_type_id,
+ targ_btf, targ_id);
+ }
+
+ local_acc = &local_spec->spec[0];
+ targ_acc = &targ_spec->spec[0];
+
+ if (core_relo_is_enumval_based(local_spec->relo_kind)) {
+ size_t local_essent_len, targ_essent_len;
+ const struct btf_enum *e;
+ const char *targ_name;
+
+ /* has to resolve to an enum */
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
+ if (!btf_is_enum(targ_type))
+ return 0;
+
+ local_essent_len = bpf_core_essential_name_len(local_acc->name);
+
+ for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
+ targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
+ if (targ_essent_len != local_essent_len)
+ continue;
+ if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+ return 1;
+ }
+ }
+ return 0;
+ }
+
+ if (!core_relo_is_field_based(local_spec->relo_kind))
+ return -EINVAL;
+
+ for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+ &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+
+ if (local_acc->name) {
+ matched = bpf_core_match_member(local_spec->btf,
+ local_acc,
+ targ_btf, targ_id,
+ targ_spec, &targ_id);
+ if (matched <= 0)
+ return matched;
+ } else {
+ /* for i=0, targ_id is already treated as array element
+ * type (because it's the original struct), for others
+ * we should find array element type first
+ */
+ if (i > 0) {
+ const struct btf_array *a;
+ bool flex;
+
+ if (!btf_is_array(targ_type))
+ return 0;
+
+ a = btf_array(targ_type);
+ flex = is_flex_arr(targ_btf, targ_acc - 1, a);
+ if (!flex && local_acc->idx >= a->nelems)
+ return 0;
+ if (!skip_mods_and_typedefs(targ_btf, a->type,
+ &targ_id))
+ return -EINVAL;
+ }
+
+ /* too deep struct/union/array nesting */
+ if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = local_acc->idx;
+ targ_acc->name = NULL;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+
+ sz = btf__resolve_size(targ_btf, targ_id);
+ if (sz < 0)
+ return sz;
+ targ_spec->bit_offset += local_acc->idx * sz * 8;
+ }
+ }
+
+ return 1;
+}
+
+static int bpf_core_calc_field_relo(const char *prog_name,
+ const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val, __u32 *field_sz, __u32 *type_id,
+ bool *validate)
+{
+ const struct bpf_core_accessor *acc;
+ const struct btf_type *t;
+ __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
+ const struct btf_member *m;
+ const struct btf_type *mt;
+ bool bitfield;
+ __s64 sz;
+
+ *field_sz = 0;
+
+ if (relo->kind == BPF_FIELD_EXISTS) {
+ *val = spec ? 1 : 0;
+ return 0;
+ }
+
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+
+ acc = &spec->spec[spec->len - 1];
+ t = btf__type_by_id(spec->btf, acc->type_id);
+
+ /* a[n] accessor needs special handling */
+ if (!acc->name) {
+ if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
+ *val = spec->bit_offset / 8;
+ /* remember field size for load/store mem size */
+ sz = btf__resolve_size(spec->btf, acc->type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *field_sz = sz;
+ *type_id = acc->type_id;
+ } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
+ sz = btf__resolve_size(spec->btf, acc->type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ } else {
+ pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
+ prog_name, relo->kind, relo->insn_off / 8);
+ return -EINVAL;
+ }
+ if (validate)
+ *validate = true;
+ return 0;
+ }
+
+ m = btf_members(t) + acc->idx;
+ mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
+ bit_off = spec->bit_offset;
+ bit_sz = btf_member_bitfield_size(t, acc->idx);
+
+ bitfield = bit_sz > 0;
+ if (bitfield) {
+ byte_sz = mt->size;
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
+ /* figure out smallest int size necessary for bitfield load */
+ while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
+ if (byte_sz >= 8) {
+ /* bitfield can't be read with 64-bit read */
+ pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
+ prog_name, relo->kind, relo->insn_off / 8);
+ return -E2BIG;
+ }
+ byte_sz *= 2;
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
+ }
+ } else {
+ sz = btf__resolve_size(spec->btf, field_type_id);
+ if (sz < 0)
+ return -EINVAL;
+ byte_sz = sz;
+ byte_off = spec->bit_offset / 8;
+ bit_sz = byte_sz * 8;
+ }
+
+ /* for bitfields, all the relocatable aspects are ambiguous and we
+ * might disagree with compiler, so turn off validation of expected
+ * value, except for signedness
+ */
+ if (validate)
+ *validate = !bitfield;
+
+ switch (relo->kind) {
+ case BPF_FIELD_BYTE_OFFSET:
+ *val = byte_off;
+ if (!bitfield) {
+ *field_sz = byte_sz;
+ *type_id = field_type_id;
+ }
+ break;
+ case BPF_FIELD_BYTE_SIZE:
+ *val = byte_sz;
+ break;
+ case BPF_FIELD_SIGNED:
+ /* enums will be assumed unsigned */
+ *val = btf_is_enum(mt) ||
+ (btf_int_encoding(mt) & BTF_INT_SIGNED);
+ if (validate)
+ *validate = true; /* signedness is never ambiguous */
+ break;
+ case BPF_FIELD_LSHIFT_U64:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ *val = 64 - (bit_off + bit_sz - byte_off * 8);
+#else
+ *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
+#endif
+ break;
+ case BPF_FIELD_RSHIFT_U64:
+ *val = 64 - bit_sz;
+ if (validate)
+ *validate = true; /* right shift is never ambiguous */
+ break;
+ case BPF_FIELD_EXISTS:
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val)
+{
+ __s64 sz;
+
+ /* type-based relos return zero when target type is not found */
+ if (!spec) {
+ *val = 0;
+ return 0;
+ }
+
+ switch (relo->kind) {
+ case BPF_TYPE_ID_TARGET:
+ *val = spec->root_type_id;
+ break;
+ case BPF_TYPE_EXISTS:
+ *val = 1;
+ break;
+ case BPF_TYPE_SIZE:
+ sz = btf__resolve_size(spec->btf, spec->root_type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ break;
+ case BPF_TYPE_ID_LOCAL:
+ /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val)
+{
+ const struct btf_type *t;
+ const struct btf_enum *e;
+
+ switch (relo->kind) {
+ case BPF_ENUMVAL_EXISTS:
+ *val = spec ? 1 : 0;
+ break;
+ case BPF_ENUMVAL_VALUE:
+ if (!spec)
+ return -EUCLEAN; /* request instruction poisoning */
+ t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
+ e = btf_enum(t) + spec->spec[0].idx;
+ *val = e->val;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+struct bpf_core_relo_res
+{
+ /* expected value in the instruction, unless validate == false */
+ __u32 orig_val;
+ /* new value that needs to be patched up to */
+ __u32 new_val;
+ /* relocation unsuccessful, poison instruction, but don't fail load */
+ bool poison;
+ /* some relocations can't be validated against orig_val */
+ bool validate;
+ /* for field byte offset relocations or the forms:
+ * *(T *)(rX + <off>) = rY
+ * rX = *(T *)(rY + <off>),
+ * we remember original and resolved field size to adjust direct
+ * memory loads of pointers and integers; this is necessary for 32-bit
+ * host kernel architectures, but also allows to automatically
+ * relocate fields that were resized from, e.g., u32 to u64, etc.
+ */
+ bool fail_memsz_adjust;
+ __u32 orig_sz;
+ __u32 orig_type_id;
+ __u32 new_sz;
+ __u32 new_type_id;
+};
+
+/* Calculate original and target relocation values, given local and target
+ * specs and relocation kind. These values are calculated for each candidate.
+ * If there are multiple candidates, resulting values should all be consistent
+ * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
+ * If instruction has to be poisoned, *poison will be set to true.
+ */
+static int bpf_core_calc_relo(const char *prog_name,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct bpf_core_spec *local_spec,
+ const struct bpf_core_spec *targ_spec,
+ struct bpf_core_relo_res *res)
+{
+ int err = -EOPNOTSUPP;
+
+ res->orig_val = 0;
+ res->new_val = 0;
+ res->poison = false;
+ res->validate = true;
+ res->fail_memsz_adjust = false;
+ res->orig_sz = res->new_sz = 0;
+ res->orig_type_id = res->new_type_id = 0;
+
+ if (core_relo_is_field_based(relo->kind)) {
+ err = bpf_core_calc_field_relo(prog_name, relo, local_spec,
+ &res->orig_val, &res->orig_sz,
+ &res->orig_type_id, &res->validate);
+ err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec,
+ &res->new_val, &res->new_sz,
+ &res->new_type_id, NULL);
+ if (err)
+ goto done;
+ /* Validate if it's safe to adjust load/store memory size.
+ * Adjustments are performed only if original and new memory
+ * sizes differ.
+ */
+ res->fail_memsz_adjust = false;
+ if (res->orig_sz != res->new_sz) {
+ const struct btf_type *orig_t, *new_t;
+
+ orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
+ new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
+
+ /* There are two use cases in which it's safe to
+ * adjust load/store's mem size:
+ * - reading a 32-bit kernel pointer, while on BPF
+ * size pointers are always 64-bit; in this case
+ * it's safe to "downsize" instruction size due to
+ * pointer being treated as unsigned integer with
+ * zero-extended upper 32-bits;
+ * - reading unsigned integers, again due to
+ * zero-extension is preserving the value correctly.
+ *
+ * In all other cases it's incorrect to attempt to
+ * load/store field because read value will be
+ * incorrect, so we poison relocated instruction.
+ */
+ if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
+ goto done;
+ if (btf_is_int(orig_t) && btf_is_int(new_t) &&
+ btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
+ btf_int_encoding(new_t) != BTF_INT_SIGNED)
+ goto done;
+
+ /* mark as invalid mem size adjustment, but this will
+ * only be checked for LDX/STX/ST insns
+ */
+ res->fail_memsz_adjust = true;
+ }
+ } else if (core_relo_is_type_based(relo->kind)) {
+ err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
+ } else if (core_relo_is_enumval_based(relo->kind)) {
+ err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
+ err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
+ }
+
+done:
+ if (err == -EUCLEAN) {
+ /* EUCLEAN is used to signal instruction poisoning request */
+ res->poison = true;
+ err = 0;
+ } else if (err == -EOPNOTSUPP) {
+ /* EOPNOTSUPP means unknown/unsupported relocation */
+ pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
+ prog_name, relo_idx, core_relo_kind_str(relo->kind),
+ relo->kind, relo->insn_off / 8);
+ }
+
+ return err;
+}
+
+/*
+ * Turn instruction for which CO_RE relocation failed into invalid one with
+ * distinct signature.
+ */
+static void bpf_core_poison_insn(const char *prog_name, int relo_idx,
+ int insn_idx, struct bpf_insn *insn)
+{
+ pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
+ prog_name, relo_idx, insn_idx);
+ insn->code = BPF_JMP | BPF_CALL;
+ insn->dst_reg = 0;
+ insn->src_reg = 0;
+ insn->off = 0;
+ /* if this instruction is reachable (not a dead code),
+ * verifier will complain with the following message:
+ * invalid func unknown#195896080
+ */
+ insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
+}
+
+static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
+{
+ switch (BPF_SIZE(insn->code)) {
+ case BPF_DW: return 8;
+ case BPF_W: return 4;
+ case BPF_H: return 2;
+ case BPF_B: return 1;
+ default: return -1;
+ }
+}
+
+static int insn_bytes_to_bpf_size(__u32 sz)
+{
+ switch (sz) {
+ case 8: return BPF_DW;
+ case 4: return BPF_W;
+ case 2: return BPF_H;
+ case 1: return BPF_B;
+ default: return -1;
+ }
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ *
+ * Patched value is determined by relocation kind and target specification.
+ * For existence relocations target spec will be NULL if field/type is not found.
+ * Expected insn->imm value is determined using relocation kind and local
+ * spec, and is checked before patching instruction. If actual insn->imm value
+ * is wrong, bail out with error.
+ *
+ * Currently supported classes of BPF instruction are:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ * 3. rX = <imm64> (load with 64-bit immediate value);
+ * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
+ * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
+ * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
+ */
+static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
+ int insn_idx, const struct bpf_core_relo *relo,
+ int relo_idx, const struct bpf_core_relo_res *res)
+{
+ __u32 orig_val, new_val;
+ __u8 class;
+
+ class = BPF_CLASS(insn->code);
+
+ if (res->poison) {
+poison:
+ /* poison second part of ldimm64 to avoid confusing error from
+ * verifier about "unknown opcode 00"
+ */
+ if (is_ldimm64_insn(insn))
+ bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1);
+ bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn);
+ return 0;
+ }
+
+ orig_val = res->orig_val;
+ new_val = res->new_val;
+
+ switch (class) {
+ case BPF_ALU:
+ case BPF_ALU64:
+ if (BPF_SRC(insn->code) != BPF_K)
+ return -EINVAL;
+ if (res->validate && insn->imm != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
+ prog_name, relo_idx,
+ insn_idx, insn->imm, orig_val, new_val);
+ return -EINVAL;
+ }
+ orig_val = insn->imm;
+ insn->imm = new_val;
+ pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
+ prog_name, relo_idx, insn_idx,
+ orig_val, new_val);
+ break;
+ case BPF_LDX:
+ case BPF_ST:
+ case BPF_STX:
+ if (res->validate && insn->off != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
+ prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val);
+ return -EINVAL;
+ }
+ if (new_val > SHRT_MAX) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
+ prog_name, relo_idx, insn_idx, new_val);
+ return -ERANGE;
+ }
+ if (res->fail_memsz_adjust) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
+ "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
+ prog_name, relo_idx, insn_idx);
+ goto poison;
+ }
+
+ orig_val = insn->off;
+ insn->off = new_val;
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
+ prog_name, relo_idx, insn_idx, orig_val, new_val);
+
+ if (res->new_sz != res->orig_sz) {
+ int insn_bytes_sz, insn_bpf_sz;
+
+ insn_bytes_sz = insn_bpf_size_to_bytes(insn);
+ if (insn_bytes_sz != res->orig_sz) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
+ prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
+ return -EINVAL;
+ }
+
+ insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
+ if (insn_bpf_sz < 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
+ prog_name, relo_idx, insn_idx, res->new_sz);
+ return -EINVAL;
+ }
+
+ insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
+ prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
+ }
+ break;
+ case BPF_LD: {
+ __u64 imm;
+
+ if (!is_ldimm64_insn(insn) ||
+ insn[0].src_reg != 0 || insn[0].off != 0 ||
+ insn[1].code != 0 || insn[1].dst_reg != 0 ||
+ insn[1].src_reg != 0 || insn[1].off != 0) {
+ pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
+ prog_name, relo_idx, insn_idx);
+ return -EINVAL;
+ }
+
+ imm = insn[0].imm + ((__u64)insn[1].imm << 32);
+ if (res->validate && imm != orig_val) {
+ pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
+ prog_name, relo_idx,
+ insn_idx, (unsigned long long)imm,
+ orig_val, new_val);
+ return -EINVAL;
+ }
+
+ insn[0].imm = new_val;
+ insn[1].imm = 0; /* currently only 32-bit values are supported */
+ pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
+ prog_name, relo_idx, insn_idx,
+ (unsigned long long)imm, new_val);
+ break;
+ }
+ default:
+ pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
+ prog_name, relo_idx, insn_idx, insn->code,
+ insn->src_reg, insn->dst_reg, insn->off, insn->imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+{
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ const char *s;
+ __u32 type_id;
+ int i;
+
+ type_id = spec->root_type_id;
+ t = btf__type_by_id(spec->btf, type_id);
+ s = btf__name_by_offset(spec->btf, t->name_off);
+
+ libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
+
+ if (core_relo_is_type_based(spec->relo_kind))
+ return;
+
+ if (core_relo_is_enumval_based(spec->relo_kind)) {
+ t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
+ e = btf_enum(t) + spec->raw_spec[0];
+ s = btf__name_by_offset(spec->btf, e->name_off);
+
+ libbpf_print(level, "::%s = %u", s, e->val);
+ return;
+ }
+
+ if (core_relo_is_field_based(spec->relo_kind)) {
+ for (i = 0; i < spec->len; i++) {
+ if (spec->spec[i].name)
+ libbpf_print(level, ".%s", spec->spec[i].name);
+ else if (i > 0 || spec->spec[i].idx > 0)
+ libbpf_print(level, "[%u]", spec->spec[i].idx);
+ }
+
+ libbpf_print(level, " (");
+ for (i = 0; i < spec->raw_len; i++)
+ libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
+
+ if (spec->bit_offset % 8)
+ libbpf_print(level, " @ offset %u.%u)",
+ spec->bit_offset / 8, spec->bit_offset % 8);
+ else
+ libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
+ return;
+ }
+}
+
+/*
+ * CO-RE relocate single instruction.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ * Candidate type is a type with the same "essential" name, ignoring
+ * everything after last triple underscore (___). E.g., `sample`,
+ * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ * for each other. Names with triple underscore are referred to as
+ * "flavors" and are useful, among other things, to allow to
+ * specify/support incompatible variations of the same kernel struct, which
+ * might differ between different kernel versions and/or build
+ * configurations.
+ *
+ * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ * converter, when deduplicated BTF of a kernel still contains more than
+ * one different types with the same name. In that case, ___2, ___3, etc
+ * are appended starting from second name conflict. But start flavors are
+ * also useful to be defined "locally", in BPF program, to extract same
+ * data from incompatible changes between different kernel
+ * versions/configurations. For instance, to handle field renames between
+ * kernel versions, one can use two flavors of the struct name with the
+ * same common name and use conditional relocations to extract that field,
+ * depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ * candidate target type. Matching involves finding corresponding
+ * high-level spec accessors, meaning that all named fields should match,
+ * as well as all array accesses should be within the actual bounds. Also,
+ * types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ * matching the spec. As long as all the specs resolve to the same set of
+ * offsets across all candidates, there is no error. If there is any
+ * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
+ * imprefection of BTF deduplication, which can cause slight duplication of
+ * the same BTF type, if some directly or indirectly referenced (by
+ * pointer) type gets resolved to different actual types in different
+ * object files. If such situation occurs, deduplicated BTF will end up
+ * with two (or more) structurally identical types, which differ only in
+ * types they refer to through pointer. This should be OK in most cases and
+ * is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ * types in target BTF. It is anticipated that this is overall more
+ * efficient memory-wise and not significantly worse (if not better)
+ * CPU-wise compared to prebuilding a map from all local type names to
+ * a list of candidate type names. It's also sped up by caching resolved
+ * list of matching candidates per each local "root" type ID, that has at
+ * least one bpf_core_relo associated with it. This list is shared
+ * between multiple relocations for the same type ID and is updated as some
+ * of the candidates are pruned due to structural incompatibility.
+ */
+int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
+ int insn_idx,
+ const struct bpf_core_relo *relo,
+ int relo_idx,
+ const struct btf *local_btf,
+ struct bpf_core_cand_list *cands)
+{
+ struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
+ struct bpf_core_relo_res cand_res, targ_res;
+ const struct btf_type *local_type;
+ const char *local_name;
+ __u32 local_id;
+ const char *spec_str;
+ int i, j, err;
+
+ local_id = relo->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ if (!local_type)
+ return -EINVAL;
+
+ local_name = btf__name_by_offset(local_btf, local_type->name_off);
+ if (!local_name)
+ return -EINVAL;
+
+ spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+ if (str_is_empty(spec_str))
+ return -EINVAL;
+
+ err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
+ prog_name, relo_idx, local_id, btf_kind_str(local_type),
+ str_is_empty(local_name) ? "<anon>" : local_name,
+ spec_str, err);
+ return -EINVAL;
+ }
+
+ pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
+ relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
+ /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
+ if (relo->kind == BPF_TYPE_ID_LOCAL) {
+ targ_res.validate = true;
+ targ_res.poison = false;
+ targ_res.orig_val = local_spec.root_type_id;
+ targ_res.new_val = local_spec.root_type_id;
+ goto patch_insn;
+ }
+
+ /* libbpf doesn't support candidate search for anonymous types */
+ if (str_is_empty(spec_str)) {
+ pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
+ prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
+ return -EOPNOTSUPP;
+ }
+
+
+ for (i = 0, j = 0; i < cands->len; i++) {
+ err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
+ cands->cands[i].id, &cand_spec);
+ if (err < 0) {
+ pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
+ prog_name, relo_idx, i);
+ bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
+ libbpf_print(LIBBPF_WARN, ": %d\n", err);
+ return err;
+ }
+
+ pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
+ relo_idx, err == 0 ? "non-matching" : "matching", i);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
+ if (err == 0)
+ continue;
+
+ err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
+ if (err)
+ return err;
+
+ if (j == 0) {
+ targ_res = cand_res;
+ targ_spec = cand_spec;
+ } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
+ /* if there are many field relo candidates, they
+ * should all resolve to the same bit offset
+ */
+ pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
+ prog_name, relo_idx, cand_spec.bit_offset,
+ targ_spec.bit_offset);
+ return -EINVAL;
+ } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
+ /* all candidates should result in the same relocation
+ * decision and value, otherwise it's dangerous to
+ * proceed due to ambiguity
+ */
+ pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
+ prog_name, relo_idx,
+ cand_res.poison ? "failure" : "success", cand_res.new_val,
+ targ_res.poison ? "failure" : "success", targ_res.new_val);
+ return -EINVAL;
+ }
+
+ cands->cands[j++] = cands->cands[i];
+ }
+
+ /*
+ * For BPF_FIELD_EXISTS relo or when used BPF program has field
+ * existence checks or kernel version/config checks, it's expected
+ * that we might not find any candidates. In this case, if field
+ * wasn't found in any candidate, the list of candidates shouldn't
+ * change at all, we'll just handle relocating appropriately,
+ * depending on relo's kind.
+ */
+ if (j > 0)
+ cands->len = j;
+
+ /*
+ * If no candidates were found, it might be both a programmer error,
+ * as well as expected case, depending whether instruction w/
+ * relocation is guarded in some way that makes it unreachable (dead
+ * code) if relocation can't be resolved. This is handled in
+ * bpf_core_patch_insn() uniformly by replacing that instruction with
+ * BPF helper call insn (using invalid helper ID). If that instruction
+ * is indeed unreachable, then it will be ignored and eliminated by
+ * verifier. If it was an error, then verifier will complain and point
+ * to a specific instruction number in its log.
+ */
+ if (j == 0) {
+ pr_debug("prog '%s': relo #%d: no matching targets found\n",
+ prog_name, relo_idx);
+
+ /* calculate single target relo result explicitly */
+ err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res);
+ if (err)
+ return err;
+ }
+
+patch_insn:
+ /* bpf_core_patch_insn() should know how to handle missing targ_spec */
+ err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
+ prog_name, relo_idx, relo->insn_off / 8, err);
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h
new file mode 100644
index 000000000000..3b9f8f18346c
--- /dev/null
+++ b/tools/lib/bpf/relo_core.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2019 Facebook */
+
+#ifndef __RELO_CORE_H
+#define __RELO_CORE_H
+
+/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value
+ * has to be adjusted by relocations.
+ */
+enum bpf_core_relo_kind {
+ BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
+ BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */
+ BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
+ BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
+ BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
+ BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
+ BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
+ BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */
+ BPF_TYPE_EXISTS = 8, /* type existence in target kernel */
+ BPF_TYPE_SIZE = 9, /* type size in bytes */
+ BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
+ BPF_ENUMVAL_VALUE = 11, /* enum value integer value */
+};
+
+/* The minimum bpf_core_relo checked by the loader
+ *
+ * CO-RE relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ * its insn->imm field to be relocated with actual field info;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ * type or field;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ * interpretation depends on specific relocation kind:
+ * - for field-based relocations, string encodes an accessed field using
+ * a sequence of field and array indices, separated by colon (:). It's
+ * conceptually very close to LLVM's getelementptr ([0]) instruction's
+ * arguments for identifying offset to a field.
+ * - for type-based relocations, strings is expected to be just "0";
+ * - for enum value-based relocations, string contains an index of enum
+ * value within its enum type;
+ *
+ * Example to provide a better feel.
+ *
+ * struct sample {
+ * int a;
+ * struct {
+ * int b[10];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ * int x = &s->a; // encoded as "0:0" (a is field #0)
+ * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
+ * // b is field #0 inside anon struct, accessing elem #5)
+ * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ * __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit field relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_core_relo {
+ __u32 insn_off;
+ __u32 type_id;
+ __u32 access_str_off;
+ enum bpf_core_relo_kind kind;
+};
+
+struct bpf_core_cand {
+ const struct btf *btf;
+ const struct btf_type *t;
+ const char *name;
+ __u32 id;
+};
+
+/* dynamically sized list of type IDs and its associated struct btf */
+struct bpf_core_cand_list {
+ struct bpf_core_cand *cands;
+ int len;
+};
+
+int bpf_core_apply_relo_insn(const char *prog_name,
+ struct bpf_insn *insn, int insn_idx,
+ const struct bpf_core_relo *relo, int relo_idx,
+ const struct btf *local_btf,
+ struct bpf_core_cand_list *cands);
+int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
+ const struct btf *targ_btf, __u32 targ_id);
+
+size_t bpf_core_essential_name_len(const char *name);
+#endif
diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt
index 1ab189f51f55..65778222183e 100644
--- a/tools/memory-model/Documentation/access-marking.txt
+++ b/tools/memory-model/Documentation/access-marking.txt
@@ -37,7 +37,9 @@ compiler's use of code-motion and common-subexpression optimizations.
Therefore, if a given access is involved in an intentional data race,
using READ_ONCE() for loads and WRITE_ONCE() for stores is usually
preferable to data_race(), which in turn is usually preferable to plain
-C-language accesses.
+C-language accesses. It is permissible to combine #2 and #3, for example,
+data_race(READ_ONCE(a)), which will both restrict compiler optimizations
+and disable KCSAN diagnostics.
KCSAN will complain about many types of data races involving plain
C-language accesses, but marking all accesses involved in a given data
@@ -86,6 +88,10 @@ that fail to exclude the updates. In this case, it is important to use
data_race() for the diagnostic reads because otherwise KCSAN would give
false-positive warnings about these diagnostic reads.
+If it is necessary to both restrict compiler optimizations and disable
+KCSAN diagnostics, use both data_race() and READ_ONCE(), for example,
+data_race(READ_ONCE(a)).
+
In theory, plain C-language loads can also be used for this use case.
However, in practice this will have the disadvantage of causing KCSAN
to generate false positives because KCSAN will have no way of knowing
@@ -126,6 +132,11 @@ consistent errors, which in turn are quite capable of breaking heuristics.
Therefore use of data_race() should be limited to cases where some other
code (such as a barrier() call) will force the occasional reload.
+Note that this use case requires that the heuristic be able to handle
+any possible error. In contrast, if the heuristics might be fatally
+confused by one or more of the possible erroneous values, use READ_ONCE()
+instead of data_race().
+
In theory, plain C-language loads can also be used for this use case.
However, in practice this will have the disadvantage of causing KCSAN
to generate false positives because KCSAN will have no way of knowing
@@ -259,9 +270,9 @@ diagnostic purposes. The code might look as follows:
return ret;
}
- int read_foo_diagnostic(void)
+ void read_foo_diagnostic(void)
{
- return data_race(foo);
+ pr_info("Current value of foo: %d\n", data_race(foo));
}
The reader-writer lock prevents the compiler from introducing concurrency
@@ -274,19 +285,34 @@ tells KCSAN that data races are expected, and should be silently
ignored. This data_race() also tells the human reading the code that
read_foo_diagnostic() might sometimes return a bogus value.
-However, please note that your kernel must be built with
-CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n in order for KCSAN to
-detect a buggy lockless write. If you need KCSAN to detect such a
-write even if that write did not change the value of foo, you also
-need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n. If you need KCSAN to
-detect such a write happening in an interrupt handler running on the
-same CPU doing the legitimate lock-protected write, you also need
-CONFIG_KCSAN_INTERRUPT_WATCHER=y. With some or all of these Kconfig
-options set properly, KCSAN can be quite helpful, although it is not
-necessarily a full replacement for hardware watchpoints. On the other
-hand, neither are hardware watchpoints a full replacement for KCSAN
-because it is not always easy to tell hardware watchpoint to conditionally
-trap on accesses.
+If it is necessary to suppress compiler optimization and also detect
+buggy lockless writes, read_foo_diagnostic() can be updated as follows:
+
+ void read_foo_diagnostic(void)
+ {
+ pr_info("Current value of foo: %d\n", data_race(READ_ONCE(foo)));
+ }
+
+Alternatively, given that KCSAN is to ignore all accesses in this function,
+this function can be marked __no_kcsan and the data_race() can be dropped:
+
+ void __no_kcsan read_foo_diagnostic(void)
+ {
+ pr_info("Current value of foo: %d\n", READ_ONCE(foo));
+ }
+
+However, in order for KCSAN to detect buggy lockless writes, your kernel
+must be built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n. If you
+need KCSAN to detect such a write even if that write did not change
+the value of foo, you also need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n.
+If you need KCSAN to detect such a write happening in an interrupt handler
+running on the same CPU doing the legitimate lock-protected write, you
+also need CONFIG_KCSAN_INTERRUPT_WATCHER=y. With some or all of these
+Kconfig options set properly, KCSAN can be quite helpful, although
+it is not necessarily a full replacement for hardware watchpoints.
+On the other hand, neither are hardware watchpoints a full replacement
+for KCSAN because it is not always easy to tell hardware watchpoint to
+conditionally trap on accesses.
Lock-Protected Writes With Lockless Reads
@@ -319,6 +345,99 @@ of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy
concurrent lockless write.
+Lock-Protected Writes With Heuristic Lockless Reads
+---------------------------------------------------
+
+For another example, suppose that the code can normally make use of
+a per-data-structure lock, but there are times when a global lock
+is required. These times are indicated via a global flag. The code
+might look as follows, and is based loosely on nf_conntrack_lock(),
+nf_conntrack_all_lock(), and nf_conntrack_all_unlock():
+
+ bool global_flag;
+ DEFINE_SPINLOCK(global_lock);
+ struct foo {
+ spinlock_t f_lock;
+ int f_data;
+ };
+
+ /* All foo structures are in the following array. */
+ int nfoo;
+ struct foo *foo_array;
+
+ void do_something_locked(struct foo *fp)
+ {
+ /* This works even if data_race() returns nonsense. */
+ if (!data_race(global_flag)) {
+ spin_lock(&fp->f_lock);
+ if (!smp_load_acquire(&global_flag)) {
+ do_something(fp);
+ spin_unlock(&fp->f_lock);
+ return;
+ }
+ spin_unlock(&fp->f_lock);
+ }
+ spin_lock(&global_lock);
+ /* global_lock held, thus global flag cannot be set. */
+ spin_lock(&fp->f_lock);
+ spin_unlock(&global_lock);
+ /*
+ * global_flag might be set here, but begin_global()
+ * will wait for ->f_lock to be released.
+ */
+ do_something(fp);
+ spin_unlock(&fp->f_lock);
+ }
+
+ void begin_global(void)
+ {
+ int i;
+
+ spin_lock(&global_lock);
+ WRITE_ONCE(global_flag, true);
+ for (i = 0; i < nfoo; i++) {
+ /*
+ * Wait for pre-existing local locks. One at
+ * a time to avoid lockdep limitations.
+ */
+ spin_lock(&fp->f_lock);
+ spin_unlock(&fp->f_lock);
+ }
+ }
+
+ void end_global(void)
+ {
+ smp_store_release(&global_flag, false);
+ spin_unlock(&global_lock);
+ }
+
+All code paths leading from the do_something_locked() function's first
+read from global_flag acquire a lock, so endless load fusing cannot
+happen.
+
+If the value read from global_flag is true, then global_flag is
+rechecked while holding ->f_lock, which, if global_flag is now false,
+prevents begin_global() from completing. It is therefore safe to invoke
+do_something().
+
+Otherwise, if either value read from global_flag is true, then after
+global_lock is acquired global_flag must be false. The acquisition of
+->f_lock will prevent any call to begin_global() from returning, which
+means that it is safe to release global_lock and invoke do_something().
+
+For this to work, only those foo structures in foo_array[] may be passed
+to do_something_locked(). The reason for this is that the synchronization
+with begin_global() relies on momentarily holding the lock of each and
+every foo structure.
+
+The smp_load_acquire() and smp_store_release() are required because
+changes to a foo structure between calls to begin_global() and
+end_global() are carried out without holding that structure's ->f_lock.
+The smp_load_acquire() and smp_store_release() ensure that the next
+invocation of do_something() from do_something_locked() will see those
+changes.
+
+
Lockless Reads and Writes
-------------------------
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index aef2a290e71a..6f3953f2a0d5 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -176,7 +176,7 @@
131 nospu quotactl sys_quotactl
132 common getpgid sys_getpgid
133 common fchdir sys_fchdir
-134 common bdflush sys_bdflush
+134 common bdflush sys_ni_syscall
135 common sysfs sys_sysfs
136 32 personality sys_personality ppc64_personality
136 64 personality ppc64_personality
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 64d51ab5a8b4..8d619ec86dcc 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -122,7 +122,7 @@
131 common quotactl sys_quotactl sys_quotactl
132 common getpgid sys_getpgid sys_getpgid
133 common fchdir sys_fchdir sys_fchdir
-134 common bdflush sys_bdflush sys_bdflush
+134 common bdflush - -
135 common sysfs sys_sysfs sys_sysfs
136 common personality sys_s390_personality sys_s390_personality
137 common afs_syscall - -
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index cdecda1ddd36..996d025b8ed8 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -223,10 +223,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
free(info_linear);
return -1;
}
- if (btf__get_from_id(info->btf_id, &btf)) {
+ btf = btf__load_from_kernel_by_id(info->btf_id);
+ if (libbpf_get_error(btf)) {
pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id);
err = -1;
- btf = NULL;
goto out;
}
perf_env__fetch_btf(env, info->btf_id, btf);
@@ -296,7 +296,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
out:
free(info_linear);
- free(btf);
+ btf__free(btf);
return err ? -1 : 0;
}
@@ -478,7 +478,8 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
if (btf_id == 0)
goto out;
- if (btf__get_from_id(btf_id, &btf)) {
+ btf = btf__load_from_kernel_by_id(btf_id);
+ if (libbpf_get_error(btf)) {
pr_debug("%s: failed to get BTF of id %u, aborting\n",
__func__, btf_id);
goto out;
@@ -486,7 +487,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
perf_env__fetch_btf(env, btf_id, btf);
out:
- free(btf);
+ btf__free(btf);
close(fd);
}
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index 8150e03367bb..ba0f20853651 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -64,8 +64,8 @@ static char *bpf_target_prog_name(int tgt_fd)
struct bpf_prog_info_linear *info_linear;
struct bpf_func_info *func_info;
const struct btf_type *t;
+ struct btf *btf = NULL;
char *name = NULL;
- struct btf *btf;
info_linear = bpf_program__get_prog_info_linear(
tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
@@ -74,12 +74,17 @@ static char *bpf_target_prog_name(int tgt_fd)
return NULL;
}
- if (info_linear->info.btf_id == 0 ||
- btf__get_from_id(info_linear->info.btf_id, &btf)) {
+ if (info_linear->info.btf_id == 0) {
pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd);
goto out;
}
+ btf = btf__load_from_kernel_by_id(info_linear->info.btf_id);
+ if (libbpf_get_error(btf)) {
+ pr_debug("failed to load btf for prog FD %d\n", tgt_fd);
+ goto out;
+ }
+
func_info = u64_to_ptr(info_linear->info.func_info);
t = btf__type_by_id(btf, func_info[0].type_id);
if (!t) {
@@ -89,6 +94,7 @@ static char *bpf_target_prog_name(int tgt_fd)
}
name = strdup(btf__name_by_offset(btf, t->name_off));
out:
+ btf__free(btf);
free(info_linear);
return name;
}
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 6276ce0c0196..5a931456e718 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -16,6 +16,7 @@ assert sys.version_info >= (3, 7), "Python version is too old"
from collections import namedtuple
from enum import Enum, auto
+from typing import Iterable
import kunit_config
import kunit_json
@@ -30,12 +31,13 @@ KunitBuildRequest = namedtuple('KunitBuildRequest',
['jobs', 'build_dir', 'alltests',
'make_options'])
KunitExecRequest = namedtuple('KunitExecRequest',
- ['timeout', 'build_dir', 'alltests', 'filter_glob'])
+ ['timeout', 'build_dir', 'alltests',
+ 'filter_glob', 'kernel_args'])
KunitParseRequest = namedtuple('KunitParseRequest',
['raw_output', 'input_data', 'build_dir', 'json'])
KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs',
'build_dir', 'alltests', 'filter_glob',
- 'json', 'make_options'])
+ 'kernel_args', 'json', 'make_options'])
KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
@@ -94,6 +96,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree,
kunit_parser.print_with_timestamp('Starting KUnit Kernel ...')
test_start = time.time()
result = linux.run_kernel(
+ args=request.kernel_args,
timeout=None if request.alltests else request.timeout,
filter_glob=request.filter_glob,
build_dir=request.build_dir)
@@ -112,7 +115,16 @@ def parse_tests(request: KunitParseRequest) -> KunitResult:
'Tests not Parsed.')
if request.raw_output:
- kunit_parser.raw_output(request.input_data)
+ output: Iterable[str] = request.input_data
+ if request.raw_output == 'all':
+ pass
+ elif request.raw_output == 'kunit':
+ output = kunit_parser.extract_tap_lines(output)
+ else:
+ print(f'Unknown --raw_output option "{request.raw_output}"', file=sys.stderr)
+ for line in output:
+ print(line.rstrip())
+
else:
test_result = kunit_parser.parse_run_tests(request.input_data)
parse_end = time.time()
@@ -133,7 +145,6 @@ def parse_tests(request: KunitParseRequest) -> KunitResult:
return KunitResult(KunitStatus.SUCCESS, test_result,
parse_end - parse_start)
-
def run_tests(linux: kunit_kernel.LinuxSourceTree,
request: KunitRequest) -> KunitResult:
run_start = time.time()
@@ -152,7 +163,8 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree,
return build_result
exec_request = KunitExecRequest(request.timeout, request.build_dir,
- request.alltests, request.filter_glob)
+ request.alltests, request.filter_glob,
+ request.kernel_args)
exec_result = exec_tests(linux, exec_request)
if exec_result.status != KunitStatus.SUCCESS:
return exec_result
@@ -178,7 +190,7 @@ def add_common_opts(parser) -> None:
parser.add_argument('--build_dir',
help='As in the make command, it specifies the build '
'directory.',
- type=str, default='.kunit', metavar='build_dir')
+ type=str, default='.kunit', metavar='build_dir')
parser.add_argument('--make_options',
help='X=Y make option, can be repeated.',
action='append')
@@ -238,10 +250,14 @@ def add_exec_opts(parser) -> None:
nargs='?',
default='',
metavar='filter_glob')
+ parser.add_argument('--kernel_args',
+ help='Kernel command-line parameters. Maybe be repeated',
+ action='append')
def add_parse_opts(parser) -> None:
- parser.add_argument('--raw_output', help='don\'t format output from kernel',
- action='store_true')
+ parser.add_argument('--raw_output', help='If set don\'t format output from kernel. '
+ 'If set to --raw_output=kunit, filters to just KUnit output.',
+ type=str, nargs='?', const='all', default=None)
parser.add_argument('--json',
nargs='?',
help='Stores test results in a JSON, and either '
@@ -309,6 +325,7 @@ def main(argv, linux=None):
cli_args.build_dir,
cli_args.alltests,
cli_args.filter_glob,
+ cli_args.kernel_args,
cli_args.json,
cli_args.make_options)
result = run_tests(linux, request)
@@ -363,7 +380,8 @@ def main(argv, linux=None):
exec_request = KunitExecRequest(cli_args.timeout,
cli_args.build_dir,
cli_args.alltests,
- cli_args.filter_glob)
+ cli_args.filter_glob,
+ cli_args.kernel_args)
exec_result = exec_tests(linux, exec_request)
parse_request = KunitParseRequest(cli_args.raw_output,
exec_result.result,
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index b88db3f51dc5..6310a641b151 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -106,10 +106,6 @@ def extract_tap_lines(kernel_output: Iterable[str]) -> LineStream:
yield line_num, line[prefix_len:]
return LineStream(lines=isolate_kunit_output(kernel_output))
-def raw_output(kernel_output) -> None:
- for line in kernel_output:
- print(line.rstrip())
-
DIVIDER = '=' * 60
RESET = '\033[0;0m'
@@ -137,7 +133,7 @@ def print_log(log) -> None:
for m in log:
print_with_timestamp(m)
-TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*#).*$')
+TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*# (Subtest:|.*: kunit test case crashed!)).*$')
def consume_non_diagnostic(lines: LineStream) -> None:
while lines and not TAP_ENTRIES.match(lines.peek()):
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 75045aa0f8a1..619c4554cbff 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -356,7 +356,7 @@ class KUnitMainTest(unittest.TestCase):
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0)
self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir='.kunit', filter_glob='', timeout=300)
+ args=None, build_dir='.kunit', filter_glob='', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_passes_args_pass(self):
@@ -364,7 +364,7 @@ class KUnitMainTest(unittest.TestCase):
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir='.kunit', filter_glob='', timeout=300)
+ args=None, build_dir='.kunit', filter_glob='', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_exec_passes_args_fail(self):
@@ -399,11 +399,20 @@ class KUnitMainTest(unittest.TestCase):
self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
self.assertNotEqual(call, mock.call(StrContains(' 0 tests run')))
+ def test_run_raw_output_kunit(self):
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+ kunit.main(['run', '--raw_output=kunit'], self.linux_source_mock)
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1)
+ for call in self.print_mock.call_args_list:
+ self.assertNotEqual(call, mock.call(StrContains('Testing complete.')))
+ self.assertNotEqual(call, mock.call(StrContains(' 0 tests run')))
+
def test_exec_timeout(self):
timeout = 3453
kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock)
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir='.kunit', filter_glob='', timeout=timeout)
+ args=None, build_dir='.kunit', filter_glob='', timeout=timeout)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_timeout(self):
@@ -411,7 +420,7 @@ class KUnitMainTest(unittest.TestCase):
kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock)
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir='.kunit', filter_glob='', timeout=timeout)
+ args=None, build_dir='.kunit', filter_glob='', timeout=timeout)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_builddir(self):
@@ -419,7 +428,7 @@ class KUnitMainTest(unittest.TestCase):
kunit.main(['run', '--build_dir=.kunit'], self.linux_source_mock)
self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir=build_dir, filter_glob='', timeout=300)
+ args=None, build_dir=build_dir, filter_glob='', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_config_builddir(self):
@@ -436,7 +445,7 @@ class KUnitMainTest(unittest.TestCase):
build_dir = '.kunit'
kunit.main(['exec', '--build_dir', build_dir], self.linux_source_mock)
self.linux_source_mock.run_kernel.assert_called_once_with(
- build_dir=build_dir, filter_glob='', timeout=300)
+ args=None, build_dir=build_dir, filter_glob='', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
@mock.patch.object(kunit_kernel, 'LinuxSourceTree')
@@ -461,5 +470,13 @@ class KUnitMainTest(unittest.TestCase):
cross_compile=None,
qemu_config_path=None)
+ def test_run_kernel_args(self):
+ kunit.main(['run', '--kernel_args=a=1', '--kernel_args=b=2'], self.linux_source_mock)
+ self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ args=['a=1','b=2'], build_dir='.kunit', filter_glob='', timeout=300)
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
+
if __name__ == '__main__':
unittest.main()
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
index f9a12005fcea..16ec895bbe5f 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -127,7 +127,6 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags)
#define kmemleak_free(a)
#define PageSlab(p) (0)
-#define flush_kernel_dcache_page(p)
#define MAX_ERRNO 4095
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index 652254754b4c..08465a701cd5 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -85,32 +85,46 @@ int main(void)
for (i = 0, test = tests; test->expected_segments; test++, i++) {
int left_pages = test->pfn_app ? test->num_pages : 0;
+ struct sg_append_table append = {};
struct page *pages[MAX_PAGES];
- struct sg_table st;
- struct scatterlist *sg;
+ int ret;
set_pages(pages, test->pfn, test->num_pages);
- sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
- test->size, test->max_seg, NULL, left_pages, GFP_KERNEL);
- assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
+ if (test->pfn_app)
+ ret = sg_alloc_append_table_from_pages(
+ &append, pages, test->num_pages, 0, test->size,
+ test->max_seg, left_pages, GFP_KERNEL);
+ else
+ ret = sg_alloc_table_from_pages_segment(
+ &append.sgt, pages, test->num_pages, 0,
+ test->size, test->max_seg, GFP_KERNEL);
+
+ assert(ret == test->alloc_ret);
if (test->alloc_ret)
continue;
if (test->pfn_app) {
set_pages(pages, test->pfn_app, test->num_pages);
- sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
- test->size, test->max_seg, sg, 0, GFP_KERNEL);
+ ret = sg_alloc_append_table_from_pages(
+ &append, pages, test->num_pages, 0, test->size,
+ test->max_seg, 0, GFP_KERNEL);
- assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
+ assert(ret == test->alloc_ret);
}
- VALIDATE(st.nents == test->expected_segments, &st, test);
+ VALIDATE(append.sgt.nents == test->expected_segments,
+ &append.sgt, test);
if (!test->pfn_app)
- VALIDATE(st.orig_nents == test->expected_segments, &st, test);
-
- sg_free_table(&st);
+ VALIDATE(append.sgt.orig_nents ==
+ test->expected_segments,
+ &append.sgt, test);
+
+ if (test->pfn_app)
+ sg_free_append_table(&append);
+ else
+ sg_free_table(&append.sgt);
}
assert(i == (sizeof(tests) / sizeof(tests[0])) - 1);
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index fb010a35d61a..c852eb40c4f7 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -35,9 +35,11 @@ TARGETS += memory-hotplug
TARGETS += mincore
TARGETS += mount
TARGETS += mount_setattr
+TARGETS += move_mount_set_group
TARGETS += mqueue
TARGETS += nci
TARGETS += net
+TARGETS += net/af_unix
TARGETS += net/forwarding
TARGETS += net/mptcp
TARGETS += netfilter
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index d66f76d2a650..b67395903b9b 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -1,5 +1,7 @@
fpsimd-test
+rdvl-sve
sve-probe-vls
sve-ptrace
sve-test
+vec-syscfg
vlset
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index a57009d3a0dc..f2abdd6ba12e 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -1,17 +1,22 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -I../../../../../usr/include/
-TEST_GEN_PROGS := sve-ptrace sve-probe-vls
-TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg
+TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \
+ rdvl-sve \
+ sve-test sve-stress \
+ vlset
all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
fpsimd-test: fpsimd-test.o
$(CC) -nostdlib $^ -o $@
+rdvl-sve: rdvl-sve.o rdvl.o
sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
-sve-probe-vls: sve-probe-vls.o
+sve-probe-vls: sve-probe-vls.o rdvl.o
sve-test: sve-test.o
$(CC) -nostdlib $^ -o $@
+vec-syscfg: vec-syscfg.o rdvl.o
vlset: vlset.o
include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/TODO b/tools/testing/selftests/arm64/fp/TODO
new file mode 100644
index 000000000000..b6b7ebfcf362
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/TODO
@@ -0,0 +1,4 @@
+- Test unsupported values in the ABIs.
+- More coverage for ptrace (eg, vector length conversions).
+- Coverage for signals.
+- Test PR_SVE_VL_INHERITY after a double fork.
diff --git a/tools/testing/selftests/arm64/fp/rdvl-sve.c b/tools/testing/selftests/arm64/fp/rdvl-sve.c
new file mode 100644
index 000000000000..7f8a13a18f5d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl-sve.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+
+#include "rdvl.h"
+
+int main(void)
+{
+ int vl = rdvl_sve();
+
+ printf("%d\n", vl);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/rdvl.S b/tools/testing/selftests/arm64/fp/rdvl.S
new file mode 100644
index 000000000000..c916c1c9defd
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl.S
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+
+.arch_extension sve
+
+.globl rdvl_sve
+rdvl_sve:
+ hint 34 // BTI C
+ rdvl x0, #1
+ ret
diff --git a/tools/testing/selftests/arm64/fp/rdvl.h b/tools/testing/selftests/arm64/fp/rdvl.h
new file mode 100644
index 000000000000..7c9d953fc9e7
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef RDVL_H
+#define RDVL_H
+
+int rdvl_sve(void);
+
+#endif
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
index 76e138525d55..a24eca7a4ecb 100644
--- a/tools/testing/selftests/arm64/fp/sve-probe-vls.c
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -13,6 +13,7 @@
#include <asm/sigcontext.h>
#include "../../kselftest.h"
+#include "rdvl.h"
int main(int argc, char **argv)
{
@@ -38,6 +39,10 @@ int main(int argc, char **argv)
vl &= PR_SVE_VL_LEN_MASK;
+ if (rdvl_sve() != vl)
+ ksft_exit_fail_msg("PR_SVE_SET_VL reports %d, RDVL %d\n",
+ vl, rdvl_sve());
+
if (!sve_vl_valid(vl))
ksft_exit_fail_msg("VL %d invalid\n", vl);
vq = sve_vq_from_vl(vl);
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
new file mode 100644
index 000000000000..c02071dcb563
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -0,0 +1,593 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ * Original author: Mark Brown <broonie@kernel.org>
+ */
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/hwcap.h>
+
+#include "../../kselftest.h"
+#include "rdvl.h"
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+
+#define ARCH_MIN_VL SVE_VL_MIN
+
+struct vec_data {
+ const char *name;
+ unsigned long hwcap_type;
+ unsigned long hwcap;
+ const char *rdvl_binary;
+ int (*rdvl)(void);
+
+ int prctl_get;
+ int prctl_set;
+ const char *default_vl_file;
+
+ int default_vl;
+ int min_vl;
+ int max_vl;
+};
+
+
+static struct vec_data vec_data[] = {
+ {
+ .name = "SVE",
+ .hwcap_type = AT_HWCAP,
+ .hwcap = HWCAP_SVE,
+ .rdvl = rdvl_sve,
+ .rdvl_binary = "./rdvl-sve",
+ .prctl_get = PR_SVE_GET_VL,
+ .prctl_set = PR_SVE_SET_VL,
+ .default_vl_file = "/proc/sys/abi/sve_default_vector_length",
+ },
+};
+
+static int stdio_read_integer(FILE *f, const char *what, int *val)
+{
+ int n = 0;
+ int ret;
+
+ ret = fscanf(f, "%d%*1[\n]%n", val, &n);
+ if (ret < 1 || n < 1) {
+ ksft_print_msg("failed to parse integer from %s\n", what);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Start a new process and return the vector length it sees */
+static int get_child_rdvl(struct vec_data *data)
+{
+ FILE *out;
+ int pipefd[2];
+ pid_t pid, child;
+ int read_vl, ret;
+
+ ret = pipe(pipefd);
+ if (ret == -1) {
+ ksft_print_msg("pipe() failed: %d (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ fflush(stdout);
+
+ child = fork();
+ if (child == -1) {
+ ksft_print_msg("fork() failed: %d (%s)\n",
+ errno, strerror(errno));
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return -1;
+ }
+
+ /* Child: put vector length on the pipe */
+ if (child == 0) {
+ /*
+ * Replace stdout with the pipe, errors to stderr from
+ * here as kselftest prints to stdout.
+ */
+ ret = dup2(pipefd[1], 1);
+ if (ret == -1) {
+ fprintf(stderr, "dup2() %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ /* exec() a new binary which puts the VL on stdout */
+ ret = execl(data->rdvl_binary, data->rdvl_binary, NULL);
+ fprintf(stderr, "execl(%s) failed: %d\n",
+ data->rdvl_binary, errno, strerror(errno));
+
+ exit(EXIT_FAILURE);
+ }
+
+ close(pipefd[1]);
+
+ /* Parent; wait for the exit status from the child & verify it */
+ do {
+ pid = wait(&ret);
+ if (pid == -1) {
+ ksft_print_msg("wait() failed: %d (%s)\n",
+ errno, strerror(errno));
+ close(pipefd[0]);
+ return -1;
+ }
+ } while (pid != child);
+
+ assert(pid == child);
+
+ if (!WIFEXITED(ret)) {
+ ksft_print_msg("child exited abnormally\n");
+ close(pipefd[0]);
+ return -1;
+ }
+
+ if (WEXITSTATUS(ret) != 0) {
+ ksft_print_msg("child returned error %d\n",
+ WEXITSTATUS(ret));
+ close(pipefd[0]);
+ return -1;
+ }
+
+ out = fdopen(pipefd[0], "r");
+ if (!out) {
+ ksft_print_msg("failed to open child stdout\n");
+ close(pipefd[0]);
+ return -1;
+ }
+
+ ret = stdio_read_integer(out, "child", &read_vl);
+ fclose(out);
+ if (ret != 0)
+ return ret;
+
+ return read_vl;
+}
+
+static int file_read_integer(const char *name, int *val)
+{
+ FILE *f;
+ int ret;
+
+ f = fopen(name, "r");
+ if (!f) {
+ ksft_test_result_fail("Unable to open %s: %d (%s)\n",
+ name, errno,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = stdio_read_integer(f, name, val);
+ fclose(f);
+
+ return ret;
+}
+
+static int file_write_integer(const char *name, int val)
+{
+ FILE *f;
+ int ret;
+
+ f = fopen(name, "w");
+ if (!f) {
+ ksft_test_result_fail("Unable to open %s: %d (%s)\n",
+ name, errno,
+ strerror(errno));
+ return -1;
+ }
+
+ fprintf(f, "%d", val);
+ fclose(f);
+ if (ret < 0) {
+ ksft_test_result_fail("Error writing %d to %s\n",
+ val, name);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Verify that we can read the default VL via proc, checking that it
+ * is set in a freshly spawned child.
+ */
+static void proc_read_default(struct vec_data *data)
+{
+ int default_vl, child_vl, ret;
+
+ ret = file_read_integer(data->default_vl_file, &default_vl);
+ if (ret != 0)
+ return;
+
+ /* Is this the actual default seen by new processes? */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != default_vl) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ default_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s default vector length %d\n", data->name,
+ default_vl);
+ data->default_vl = default_vl;
+}
+
+/* Verify that we can write a minimum value and have it take effect */
+static void proc_write_min(struct vec_data *data)
+{
+ int ret, new_default, child_vl;
+
+ if (geteuid() != 0) {
+ ksft_test_result_skip("Need to be root to write to /proc\n");
+ return;
+ }
+
+ ret = file_write_integer(data->default_vl_file, ARCH_MIN_VL);
+ if (ret != 0)
+ return;
+
+ /* What was the new value? */
+ ret = file_read_integer(data->default_vl_file, &new_default);
+ if (ret != 0)
+ return;
+
+ /* Did it take effect in a new process? */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != new_default) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ new_default, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s minimum vector length %d\n", data->name,
+ new_default);
+ data->min_vl = new_default;
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* Verify that we can write a maximum value and have it take effect */
+static void proc_write_max(struct vec_data *data)
+{
+ int ret, new_default, child_vl;
+
+ if (geteuid() != 0) {
+ ksft_test_result_skip("Need to be root to write to /proc\n");
+ return;
+ }
+
+ /* -1 is accepted by the /proc interface as the maximum VL */
+ ret = file_write_integer(data->default_vl_file, -1);
+ if (ret != 0)
+ return;
+
+ /* What was the new value? */
+ ret = file_read_integer(data->default_vl_file, &new_default);
+ if (ret != 0)
+ return;
+
+ /* Did it take effect in a new process? */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != new_default) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ new_default, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s maximum vector length %d\n", data->name,
+ new_default);
+ data->max_vl = new_default;
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* Can we read back a VL from prctl? */
+static void prctl_get(struct vec_data *data)
+{
+ int ret;
+
+ ret = prctl(data->prctl_get);
+ if (ret == -1) {
+ ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+
+ /* Mask out any flags */
+ ret &= PR_SVE_VL_LEN_MASK;
+
+ /* Is that what we can read back directly? */
+ if (ret == data->rdvl())
+ ksft_test_result_pass("%s current VL is %d\n",
+ data->name, ret);
+ else
+ ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n",
+ data->name, ret, data->rdvl());
+}
+
+/* Does the prctl let us set the VL we already have? */
+static void prctl_set_same(struct vec_data *data)
+{
+ int cur_vl = data->rdvl();
+ int ret;
+
+ ret = prctl(data->prctl_set, cur_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+
+ if (cur_vl != data->rdvl())
+ ksft_test_result_pass("%s current VL is %d\n",
+ data->name, ret);
+ else
+ ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n",
+ data->name, ret, data->rdvl());
+}
+
+/* Can we set a new VL for this process? */
+static void prctl_set(struct vec_data *data)
+{
+ int ret;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ /* Try to set the minimum VL */
+ ret = prctl(data->prctl_set, data->min_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ if ((ret & PR_SVE_VL_LEN_MASK) != data->min_vl) {
+ ksft_test_result_fail("%s prctl set %d but return value is %d\n",
+ data->name, data->min_vl, data->rdvl());
+ return;
+ }
+
+ if (data->rdvl() != data->min_vl) {
+ ksft_test_result_fail("%s set %d but RDVL is %d\n",
+ data->name, data->min_vl, data->rdvl());
+ return;
+ }
+
+ /* Try to set the maximum VL */
+ ret = prctl(data->prctl_set, data->max_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->max_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ if ((ret & PR_SVE_VL_LEN_MASK) != data->max_vl) {
+ ksft_test_result_fail("%s prctl() set %d but return value is %d\n",
+ data->name, data->max_vl, data->rdvl());
+ return;
+ }
+
+ /* The _INHERIT flag should not be present when we read the VL */
+ ret = prctl(data->prctl_get);
+ if (ret == -1) {
+ ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+
+ if (ret & PR_SVE_VL_INHERIT) {
+ ksft_test_result_fail("%s prctl() reports _INHERIT\n",
+ data->name);
+ return;
+ }
+
+ ksft_test_result_pass("%s prctl() set min/max\n", data->name);
+}
+
+/* If we didn't request it a new VL shouldn't affect the child */
+static void prctl_set_no_child(struct vec_data *data)
+{
+ int ret, child_vl;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ ret = prctl(data->prctl_set, data->min_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* Ensure the default VL is different */
+ ret = file_write_integer(data->default_vl_file, data->max_vl);
+ if (ret != 0)
+ return;
+
+ /* Check that the child has the default we just set */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != data->max_vl) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ data->max_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s vector length used default\n", data->name);
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* If we didn't request it a new VL shouldn't affect the child */
+static void prctl_set_for_child(struct vec_data *data)
+{
+ int ret, child_vl;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ ret = prctl(data->prctl_set, data->min_vl | PR_SVE_VL_INHERIT);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* The _INHERIT flag should be present when we read the VL */
+ ret = prctl(data->prctl_get);
+ if (ret == -1) {
+ ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+ if (!(ret & PR_SVE_VL_INHERIT)) {
+ ksft_test_result_fail("%s prctl() does not report _INHERIT\n",
+ data->name);
+ return;
+ }
+
+ /* Ensure the default VL is different */
+ ret = file_write_integer(data->default_vl_file, data->max_vl);
+ if (ret != 0)
+ return;
+
+ /* Check that the child inherited our VL */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != data->min_vl) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ data->min_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s vector length was inherited\n", data->name);
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* _ONEXEC takes effect only in the child process */
+static void prctl_set_onexec(struct vec_data *data)
+{
+ int ret, child_vl;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ /* Set a known value for the default and our current VL */
+ ret = file_write_integer(data->default_vl_file, data->max_vl);
+ if (ret != 0)
+ return;
+
+ ret = prctl(data->prctl_set, data->max_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* Set a different value for the child to have on exec */
+ ret = prctl(data->prctl_set, data->min_vl | PR_SVE_SET_VL_ONEXEC);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* Our current VL should stay the same */
+ if (data->rdvl() != data->max_vl) {
+ ksft_test_result_fail("%s VL changed by _ONEXEC prctl()\n",
+ data->name);
+ return;
+ }
+
+ /* Check that the child inherited our VL */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != data->min_vl) {
+ ksft_test_result_fail("Set %d _ONEXEC but child VL is %d\n",
+ data->min_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s vector length set on exec\n", data->name);
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+typedef void (*test_type)(struct vec_data *);
+
+static const test_type tests[] = {
+ /*
+ * The default/min/max tests must be first and in this order
+ * to provide data for other tests.
+ */
+ proc_read_default,
+ proc_write_min,
+ proc_write_max,
+
+ prctl_get,
+ prctl_set,
+ prctl_set_no_child,
+ prctl_set_for_child,
+ prctl_set_onexec,
+};
+
+int main(void)
+{
+ int i, j;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data));
+
+ for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
+ struct vec_data *data = &vec_data[i];
+ unsigned long supported;
+
+ supported = getauxval(data->hwcap_type) & data->hwcap;
+
+ for (j = 0; j < ARRAY_SIZE(tests); j++) {
+ if (supported)
+ tests[j](data);
+ else
+ ksft_test_result_skip("%s not supported\n",
+ data->name);
+ }
+ }
+
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
index bc3ac63f3314..d1fe4ddf1669 100644
--- a/tools/testing/selftests/arm64/mte/.gitignore
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -1,4 +1,5 @@
check_buffer_fill
+check_gcr_el1_cswitch
check_tags_inclusion
check_child_memory
check_mmap_options
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index f50ac31920d1..0328a1e08f65 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -298,7 +298,7 @@ int mte_default_setup(void)
int ret;
if (!(hwcaps2 & HWCAP2_MTE)) {
- ksft_print_msg("FAIL: MTE features unavailable\n");
+ ksft_print_msg("SKIP: MTE features unavailable\n");
return KSFT_SKIP;
}
/* Get current mte mode */
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
index 592fe538506e..b743daa772f5 100644
--- a/tools/testing/selftests/arm64/pauth/pac.c
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -25,13 +25,15 @@
do { \
unsigned long hwcaps = getauxval(AT_HWCAP); \
/* data key instructions are not in NOP space. This prevents a SIGILL */ \
- ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \
+ if (!(hwcaps & HWCAP_PACA)) \
+ SKIP(return, "PAUTH not enabled"); \
} while (0)
#define ASSERT_GENERIC_PAUTH_ENABLED() \
do { \
unsigned long hwcaps = getauxval(AT_HWCAP); \
/* generic key instructions are not in NOP space. This prevents a SIGILL */ \
- ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \
+ if (!(hwcaps & HWCAP_PACG)) \
+ SKIP(return, "Generic PAUTH not enabled"); \
} while (0)
void sign_specific(struct signatures *sign, size_t val)
@@ -256,7 +258,7 @@ TEST(single_thread_different_keys)
unsigned long hwcaps = getauxval(AT_HWCAP);
/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
- ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+ ASSERT_PAUTH_ENABLED();
if (!(hwcaps & HWCAP_PACG)) {
TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
nkeys = NKEYS - 1;
@@ -299,7 +301,7 @@ TEST(exec_changed_keys)
unsigned long hwcaps = getauxval(AT_HWCAP);
/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
- ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+ ASSERT_PAUTH_ENABLED();
if (!(hwcaps & HWCAP_PACG)) {
TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
nkeys = NKEYS - 1;
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index 78c902045ca7..c1742755abb9 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
mangle_*
fake_sigreturn_*
+sve_*
!*.[ch]
diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
index f96baf1cef1a..ebe8694dbef0 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.h
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -33,10 +33,12 @@
*/
enum {
FSSBS_BIT,
+ FSVE_BIT,
FMAX_END
};
#define FEAT_SSBS (1UL << FSSBS_BIT)
+#define FEAT_SVE (1UL << FSVE_BIT)
/*
* A descriptor used to describe and configure a test case.
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 2de6e5ed5e25..6836510a522f 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -26,6 +26,7 @@ static int sig_copyctx = SIGTRAP;
static char const *const feats_names[FMAX_END] = {
" SSBS ",
+ " SVE ",
};
#define MAX_FEATS_SZ 128
@@ -263,6 +264,8 @@ int test_init(struct tdescr *td)
*/
if (getauxval(AT_HWCAP) & HWCAP_SSBS)
td->feats_supported |= FEAT_SSBS;
+ if (getauxval(AT_HWCAP) & HWCAP_SVE)
+ td->feats_supported |= FEAT_SVE;
if (feats_ok(td))
fprintf(stderr,
"Required Features: [%s] supported\n",
diff --git a/tools/testing/selftests/arm64/signal/testcases/TODO b/tools/testing/selftests/arm64/signal/testcases/TODO
new file mode 100644
index 000000000000..110ff9fd195d
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/TODO
@@ -0,0 +1,2 @@
+- Validate that register contents are saved and restored as expected.
+- Support and validate extra_context.
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
new file mode 100644
index 000000000000..bb50b5adbf10
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Attempt to change the SVE vector length in a signal hander, this is not
+ * supported and is expected to segfault.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sve_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least two VLs */
+ if (nvls < 2) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static int fake_sigreturn_sve_change_vl(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *sve;
+
+ /* Get a signal context with a SVE frame in it */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ if (head->size != sizeof(struct sve_context)) {
+ fprintf(stderr, "SVE register state active, skipping\n");
+ return 1;
+ }
+
+ sve = (struct sve_context *)head;
+
+ /* No changes are supported; init left us at minimum VL so go to max */
+ fprintf(stderr, "Attempting to change VL from %d to %d\n",
+ sve->vl, vls[0]);
+ sve->vl = vls[0];
+
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_SVE_CHANGE",
+ .descr = "Attempt to change SVE VL",
+ .feats_required = FEAT_SVE,
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .init = sve_get_vls,
+ .run = fake_sigreturn_sve_change_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
new file mode 100644
index 000000000000..4b2418aa08a9
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the SVE register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sve_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least one VL */
+ if (nvls < 1) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static void setup_sve_regs(void)
+{
+ /* RDVL x16, #1 so we should have SVE regs; real data is TODO */
+ asm volatile(".inst 0x04bf5030" : : : "x16" );
+}
+
+static int do_one_sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *sve;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ if (prctl(PR_SVE_SET_VL, vl) == -1) {
+ fprintf(stderr, "Failed to set VL\n");
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have a SVE frame and registers
+ * in it.
+ */
+ setup_sve_regs();
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ sve = (struct sve_context *)head;
+ if (sve->vl != vl) {
+ fprintf(stderr, "Got VL %d, expected %d\n", sve->vl, vl);
+ return 1;
+ }
+
+ /* The actual size validation is done in get_current_context() */
+ fprintf(stderr, "Got expected size %u and VL %d\n",
+ head->size, sve->vl);
+
+ return 0;
+}
+
+static int sve_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ /*
+ * TODO: the signal test helpers can't currently cope
+ * with signal frames bigger than struct sigcontext,
+ * skip VLs that will trigger that.
+ */
+ if (vls[i] > 64)
+ continue;
+
+ if (do_one_sve_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "SVE registers",
+ .descr = "Check that we get the right SVE registers reported",
+ .feats_required = FEAT_SVE,
+ .timeout = 3,
+ .init = sve_get_vls,
+ .run = sve_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_vl.c b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
new file mode 100644
index 000000000000..92904653add1
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Check that the SVE vector length reported in signal contexts is the
+ * expected one.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+unsigned int vl;
+
+static bool get_sve_vl(struct tdescr *td)
+{
+ int ret = prctl(PR_SVE_GET_VL);
+ if (ret == -1)
+ return false;
+
+ vl = ret;
+
+ return true;
+}
+
+static int sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *sve;
+
+ /* Get a signal context which should have a SVE frame in it */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+ sve = (struct sve_context *)head;
+
+ if (sve->vl != vl) {
+ fprintf(stderr, "sigframe VL %u, expected %u\n",
+ sve->vl, vl);
+ return 1;
+ } else {
+ fprintf(stderr, "got expected VL %u\n", vl);
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "SVE VL",
+ .descr = "Check that we get the right SVE VL reported",
+ .feats_required = FEAT_SVE,
+ .timeout = 3,
+ .init = get_sve_vl,
+ .run = sve_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
index 61ebcdf63831..8c2a57fc2f9c 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -50,12 +50,38 @@ bool validate_extra_context(struct extra_context *extra, char **err)
return true;
}
+bool validate_sve_context(struct sve_context *sve, char **err)
+{
+ /* Size will be rounded up to a multiple of 16 bytes */
+ size_t regs_size
+ = ((SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl)) + 15) / 16) * 16;
+
+ if (!sve || !err)
+ return false;
+
+ /* Either a bare sve_context or a sve_context followed by regs data */
+ if ((sve->head.size != sizeof(struct sve_context)) &&
+ (sve->head.size != regs_size)) {
+ *err = "bad size for SVE context";
+ return false;
+ }
+
+ if (!sve_vl_valid(sve->vl)) {
+ *err = "SVE VL invalid";
+
+ return false;
+ }
+
+ return true;
+}
+
bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
{
bool terminated = false;
size_t offs = 0;
int flags = 0;
struct extra_context *extra = NULL;
+ struct sve_context *sve = NULL;
struct _aarch64_ctx *head =
(struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
@@ -90,9 +116,8 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
case SVE_MAGIC:
if (flags & SVE_CTX)
*err = "Multiple SVE_MAGIC";
- else if (head->size !=
- sizeof(struct sve_context))
- *err = "Bad size for sve_context";
+ /* Size is validated in validate_sve_context() */
+ sve = (struct sve_context *)head;
flags |= SVE_CTX;
break;
case EXTRA_MAGIC:
@@ -137,6 +162,9 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
if (flags & EXTRA_CTX)
if (!validate_extra_context(extra, err))
return false;
+ if (flags & SVE_CTX)
+ if (!validate_sve_context(sve, err))
+ return false;
head = GET_RESV_NEXT_HEAD(head);
}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index addcfd8b615e..433f8bef261e 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -23,7 +23,6 @@ test_skb_cgroup_id_user
test_cgroup_storage
test_flow_dissector
flow_dissector_load
-test_netcnt
test_tcpnotify_user
test_libbpf
test_tcp_check_syncookie_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f405b20c1e6c..866531c08e4f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -38,7 +38,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_verifier_log test_dev_cgroup \
test_sock test_sockmap get_cgroup_id_user \
test_cgroup_storage \
- test_netcnt test_tcpnotify_user test_sysctl \
+ test_tcpnotify_user test_sysctl \
test_progs-no_alu32
# Also test bpf-gcc, if present
@@ -79,7 +79,7 @@ TEST_PROGS := test_kmod.sh \
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
- test_xdp_vlan.sh
+ test_xdp_vlan.sh test_bpftool.py
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
@@ -187,6 +187,8 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \
cp $(SCRATCH_DIR)/runqslower $@
+TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL)
+
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
@@ -197,7 +199,6 @@ $(OUTPUT)/test_sockmap: cgroup_helpers.c
$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
-$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 8deec1ca9150..9b17f2867488 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -19,6 +19,13 @@ the CI. It builds the kernel (without overwriting your existing Kconfig), recomp
bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and
saves the resulting output (by default in ``~/.bpf_selftests``).
+Script dependencies:
+- clang (preferably built from sources, https://github.com/llvm/llvm-project);
+- pahole (preferably built from sources, https://git.kernel.org/pub/scm/devel/pahole/pahole.git/);
+- qemu;
+- docutils (for ``rst2man``);
+- libcap-devel.
+
For more information on about using the script, run:
.. code-block:: console
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 029589c008c9..b1ede6f0b821 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -12,6 +12,10 @@
SEC("struct_ops/"#name) \
BPF_PROG(name, args)
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
#define tcp_jiffies32 ((__u32)bpf_jiffies64())
struct sock_common {
@@ -27,6 +31,7 @@ enum sk_pacing {
struct sock {
struct sock_common __sk_common;
+#define sk_state __sk_common.skc_state
unsigned long sk_pacing_rate;
__u32 sk_pacing_status; /* see enum sk_pacing */
} __attribute__((preserve_access_index));
@@ -203,6 +208,20 @@ static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}
+static __always_inline bool tcp_cc_eq(const char *a, const char *b)
+{
+ int i;
+
+ for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+ if (a[i] != b[i])
+ return false;
+ if (!a[i])
+ break;
+ }
+
+ return true;
+}
+
extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h
index 81084c1c2c23..0ab1c88041cd 100644
--- a/tools/testing/selftests/bpf/netcnt_common.h
+++ b/tools/testing/selftests/bpf/netcnt_common.h
@@ -6,19 +6,39 @@
#define MAX_PERCPU_PACKETS 32
-struct percpu_net_cnt {
- __u64 packets;
- __u64 bytes;
+/* sizeof(struct bpf_local_storage_elem):
+ *
+ * It really is about 128 bytes on x86_64, but allocate more to account for
+ * possible layout changes, different architectures, etc.
+ * The kernel will wrap up to PAGE_SIZE internally anyway.
+ */
+#define SIZEOF_BPF_LOCAL_STORAGE_ELEM 256
- __u64 prev_ts;
+/* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */
+#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE (0xFFFF - \
+ SIZEOF_BPF_LOCAL_STORAGE_ELEM)
- __u64 prev_packets;
- __u64 prev_bytes;
+#define PCPU_MIN_UNIT_SIZE 32768
+
+union percpu_net_cnt {
+ struct {
+ __u64 packets;
+ __u64 bytes;
+
+ __u64 prev_ts;
+
+ __u64 prev_packets;
+ __u64 prev_bytes;
+ };
+ __u8 data[PCPU_MIN_UNIT_SIZE];
};
-struct net_cnt {
- __u64 packets;
- __u64 bytes;
+union net_cnt {
+ struct {
+ __u64 packets;
+ __u64 bytes;
+ };
+ __u8 data[BPF_LOCAL_STORAGE_MAX_VALUE_SIZE];
};
#endif
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 2060bc122c53..7e9f6375757a 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -66,17 +66,13 @@ int settimeo(int fd, int timeout_ms)
#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
-int start_server(int family, int type, const char *addr_str, __u16 port,
- int timeout_ms)
+static int __start_server(int type, const struct sockaddr *addr,
+ socklen_t addrlen, int timeout_ms, bool reuseport)
{
- struct sockaddr_storage addr = {};
- socklen_t len;
+ int on = 1;
int fd;
- if (make_sockaddr(family, addr_str, port, &addr, &len))
- return -1;
-
- fd = socket(family, type, 0);
+ fd = socket(addr->sa_family, type, 0);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
@@ -85,7 +81,13 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
if (settimeo(fd, timeout_ms))
goto error_close;
- if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+ if (reuseport &&
+ setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
+ log_err("Failed to set SO_REUSEPORT");
+ return -1;
+ }
+
+ if (bind(fd, addr, addrlen) < 0) {
log_err("Failed to bind socket");
goto error_close;
}
@@ -104,6 +106,69 @@ error_close:
return -1;
}
+int start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return -1;
+
+ return __start_server(type, (struct sockaddr *)&addr,
+ addrlen, timeout_ms, false);
+}
+
+int *start_reuseport_server(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms, unsigned int nr_listens)
+{
+ struct sockaddr_storage addr;
+ unsigned int nr_fds = 0;
+ socklen_t addrlen;
+ int *fds;
+
+ if (!nr_listens)
+ return NULL;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return NULL;
+
+ fds = malloc(sizeof(*fds) * nr_listens);
+ if (!fds)
+ return NULL;
+
+ fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen,
+ timeout_ms, true);
+ if (fds[0] == -1)
+ goto close_fds;
+ nr_fds = 1;
+
+ if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
+ goto close_fds;
+
+ for (; nr_fds < nr_listens; nr_fds++) {
+ fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr,
+ addrlen, timeout_ms, true);
+ if (fds[nr_fds] == -1)
+ goto close_fds;
+ }
+
+ return fds;
+
+close_fds:
+ free_fds(fds, nr_fds);
+ return NULL;
+}
+
+void free_fds(int *fds, unsigned int nr_close_fds)
+{
+ if (fds) {
+ while (nr_close_fds)
+ close(fds[--nr_close_fds]);
+ free(fds);
+ }
+}
+
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms)
{
@@ -153,13 +218,18 @@ static int connect_fd_to_addr(int fd,
return 0;
}
-int connect_to_fd(int server_fd, int timeout_ms)
+static const struct network_helper_opts default_opts;
+
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
{
struct sockaddr_storage addr;
struct sockaddr_in *addr_in;
socklen_t addrlen, optlen;
int fd, type;
+ if (!opts)
+ opts = &default_opts;
+
optlen = sizeof(type);
if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
log_err("getsockopt(SOL_TYPE)");
@@ -179,7 +249,12 @@ int connect_to_fd(int server_fd, int timeout_ms)
return -1;
}
- if (settimeo(fd, timeout_ms))
+ if (settimeo(fd, opts->timeout_ms))
+ goto error_close;
+
+ if (opts->cc && opts->cc[0] &&
+ setsockopt(fd, SOL_TCP, TCP_CONGESTION, opts->cc,
+ strlen(opts->cc) + 1))
goto error_close;
if (connect_fd_to_addr(fd, &addr, addrlen))
@@ -192,6 +267,15 @@ error_close:
return -1;
}
+int connect_to_fd(int server_fd, int timeout_ms)
+{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ };
+
+ return connect_to_fd_opts(server_fd, &opts);
+}
+
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
{
struct sockaddr_storage addr;
@@ -217,6 +301,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
if (family == AF_INET) {
struct sockaddr_in *sin = (void *)addr;
+ memset(addr, 0, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_port = htons(port);
if (addr_str &&
@@ -230,6 +315,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
} else if (family == AF_INET6) {
struct sockaddr_in6 *sin6 = (void *)addr;
+ memset(addr, 0, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
sin6->sin6_port = htons(port);
if (addr_str &&
@@ -243,3 +329,15 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
}
return -1;
}
+
+char *ping_command(int family)
+{
+ if (family == AF_INET6) {
+ /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
+ if (!system("which ping6 >/dev/null 2>&1"))
+ return "ping6";
+ else
+ return "ping -6";
+ }
+ return "ping";
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 5e0d51c07b63..da7e132657d5 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -17,6 +17,11 @@ typedef __u16 __sum16;
#define VIP_NUM 5
#define MAGIC_BYTES 123
+struct network_helper_opts {
+ const char *cc;
+ int timeout_ms;
+};
+
/* ipv4 test vector */
struct ipv4_packet {
struct ethhdr eth;
@@ -36,11 +41,17 @@ extern struct ipv6_packet pkt_v6;
int settimeo(int fd, int timeout_ms);
int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
+int *start_reuseport_server(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms,
+ unsigned int nr_listens);
+void free_fds(int *fds, unsigned int nr_close_fds);
int connect_to_fd(int server_fd, int timeout_ms);
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms);
int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len);
+char *ping_command(int family);
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index ec11e20d2b92..bf307bb9e446 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -2,79 +2,28 @@
#include <test_progs.h>
#include "test_attach_probe.skel.h"
-#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
-
-#define OP_RT_RA_MASK 0xffff0000UL
-#define LIS_R2 0x3c400000UL
-#define ADDIS_R2_R12 0x3c4c0000UL
-#define ADDI_R2_R2 0x38420000UL
-
-static ssize_t get_offset(ssize_t addr, ssize_t base)
-{
- u32 *insn = (u32 *) addr;
-
- /*
- * A PPC64 ABIv2 function may have a local and a global entry
- * point. We need to use the local entry point when patching
- * functions, so identify and step over the global entry point
- * sequence.
- *
- * The global entry point sequence is always of the form:
- *
- * addis r2,r12,XXXX
- * addi r2,r2,XXXX
- *
- * A linker optimisation may convert the addis to lis:
- *
- * lis r2,XXXX
- * addi r2,r2,XXXX
- */
- if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
- ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
- ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
- return (ssize_t)(insn + 2) - base;
- else
- return addr - base;
-}
-#else
-#define get_offset(addr, base) (addr - base)
-#endif
-
-ssize_t get_base_addr() {
- size_t start, offset;
- char buf[256];
- FILE *f;
-
- f = fopen("/proc/self/maps", "r");
- if (!f)
- return -errno;
-
- while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
- &start, buf, &offset) == 3) {
- if (strcmp(buf, "r-xp") == 0) {
- fclose(f);
- return start - offset;
- }
- }
-
- fclose(f);
- return -EINVAL;
-}
+/* this is how USDT semaphore is actually defined, except volatile modifier */
+volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
void test_attach_probe(void)
{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
int duration = 0;
struct bpf_link *kprobe_link, *kretprobe_link;
struct bpf_link *uprobe_link, *uretprobe_link;
struct test_attach_probe* skel;
size_t uprobe_offset;
- ssize_t base_addr;
+ ssize_t base_addr, ref_ctr_offset;
base_addr = get_base_addr();
if (CHECK(base_addr < 0, "get_base_addr",
"failed to find base addr: %zd", base_addr))
return;
- uprobe_offset = get_offset((size_t)&get_base_addr, base_addr);
+ uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+ ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
+ if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
+ return;
skel = test_attach_probe__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
@@ -96,20 +45,28 @@ void test_attach_probe(void)
goto cleanup;
skel->links.handle_kretprobe = kretprobe_link;
- uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
- false /* retprobe */,
- 0 /* self pid */,
- "/proc/self/exe",
- uprobe_offset);
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
+
+ uprobe_opts.retprobe = false;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset,
+ &uprobe_opts);
if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
goto cleanup;
skel->links.handle_uprobe = uprobe_link;
- uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe,
- true /* retprobe */,
- -1 /* any pid */,
- "/proc/self/exe",
- uprobe_offset);
+ ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
+
+ /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */
+ uprobe_opts.retprobe = true;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+ -1 /* any pid */,
+ "/proc/self/exe",
+ uprobe_offset, &uprobe_opts);
if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe"))
goto cleanup;
skel->links.handle_uretprobe = uretprobe_link;
@@ -136,4 +93,5 @@ void test_attach_probe(void)
cleanup:
test_attach_probe__destroy(skel);
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
new file mode 100644
index 000000000000..5eea3c3a40fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "test_bpf_cookie.skel.h"
+
+static void kprobe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+
+ /* attach two kprobes */
+ opts.bpf_cookie = 0x1;
+ opts.retprobe = false;
+ link1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x2;
+ opts.retprobe = false;
+ link2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* attach two kretprobes */
+ opts.bpf_cookie = 0x10;
+ opts.retprobe = true;
+ retlink1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x20;
+ opts.retprobe = true;
+ retlink2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+ goto cleanup;
+
+ /* trigger kprobe && kretprobe */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->kprobe_res, 0x1 | 0x2, "kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 0x10 | 0x20, "kretprobe_res");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(retlink1);
+ bpf_link__destroy(retlink2);
+}
+
+static void uprobe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+ size_t uprobe_offset;
+ ssize_t base_addr;
+
+ base_addr = get_base_addr();
+ uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+ /* attach two uprobes */
+ opts.bpf_cookie = 0x100;
+ opts.retprobe = false;
+ link1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, 0 /* self pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x200;
+ opts.retprobe = false;
+ link2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, -1 /* any pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* attach two uretprobes */
+ opts.bpf_cookie = 0x1000;
+ opts.retprobe = true;
+ retlink1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, -1 /* any pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x2000;
+ opts.retprobe = true;
+ retlink2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, 0 /* self pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+ goto cleanup;
+
+ /* trigger uprobe && uretprobe */
+ get_base_addr();
+
+ ASSERT_EQ(skel->bss->uprobe_res, 0x100 | 0x200, "uprobe_res");
+ ASSERT_EQ(skel->bss->uretprobe_res, 0x1000 | 0x2000, "uretprobe_res");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(retlink1);
+ bpf_link__destroy(retlink2);
+}
+
+static void tp_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL, *link3 = NULL;
+
+ /* attach first tp prog */
+ opts.bpf_cookie = 0x10000;
+ link1 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp1,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ /* attach second tp prog */
+ opts.bpf_cookie = 0x20000;
+ link2 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp2,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->tp_res, 0x10000 | 0x20000, "tp_res1");
+
+ /* now we detach first prog and will attach third one, which causes
+ * two internal calls to bpf_prog_array_copy(), shuffling
+ * bpf_prog_array_items around. We test here that we don't lose track
+ * of associated bpf_cookies.
+ */
+ bpf_link__destroy(link1);
+ link1 = NULL;
+ kern_sync_rcu();
+ skel->bss->tp_res = 0;
+
+ /* attach third tp prog */
+ opts.bpf_cookie = 0x40000;
+ link3 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp3,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link3, "link3"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->tp_res, 0x20000 | 0x40000, "tp_res2");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link3);
+}
+
+static void burn_cpu(void)
+{
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+ int i, err;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ ASSERT_OK(err, "set_thread_affinity");
+
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+}
+
+static void pe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts);
+ struct bpf_link *link = NULL;
+ struct perf_event_attr attr;
+ int pfd = -1;
+
+ /* create perf event */
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(pfd, 0, "perf_fd"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x100000;
+ link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "link1"))
+ goto cleanup;
+
+ burn_cpu(); /* trigger BPF prog */
+
+ ASSERT_EQ(skel->bss->pe_res, 0x100000, "pe_res1");
+
+ /* prevent bpf_link__destroy() closing pfd itself */
+ bpf_link__disconnect(link);
+ /* close BPF link's FD explicitly */
+ close(bpf_link__fd(link));
+ /* free up memory used by struct bpf_link */
+ bpf_link__destroy(link);
+ link = NULL;
+ kern_sync_rcu();
+ skel->bss->pe_res = 0;
+
+ opts.bpf_cookie = 0x200000;
+ link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "link2"))
+ goto cleanup;
+
+ burn_cpu(); /* trigger BPF prog */
+
+ ASSERT_EQ(skel->bss->pe_res, 0x200000, "pe_res2");
+
+cleanup:
+ close(pfd);
+ bpf_link__destroy(link);
+}
+
+void test_bpf_cookie(void)
+{
+ struct test_bpf_cookie *skel;
+
+ skel = test_bpf_cookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_tid = syscall(SYS_gettid);
+
+ if (test__start_subtest("kprobe"))
+ kprobe_subtest(skel);
+ if (test__start_subtest("uprobe"))
+ uprobe_subtest(skel);
+ if (test__start_subtest("tracepoint"))
+ tp_subtest(skel);
+ if (test__start_subtest("perf_event"))
+ pe_subtest(skel);
+
+ test_bpf_cookie__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 1f1aade56504..77ac24b191d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -13,6 +13,7 @@
#include "bpf_iter_tcp6.skel.h"
#include "bpf_iter_udp4.skel.h"
#include "bpf_iter_udp6.skel.h"
+#include "bpf_iter_unix.skel.h"
#include "bpf_iter_test_kern1.skel.h"
#include "bpf_iter_test_kern2.skel.h"
#include "bpf_iter_test_kern3.skel.h"
@@ -313,6 +314,19 @@ static void test_udp6(void)
bpf_iter_udp6__destroy(skel);
}
+static void test_unix(void)
+{
+ struct bpf_iter_unix *skel;
+
+ skel = bpf_iter_unix__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_unix__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_unix);
+
+ bpf_iter_unix__destroy(skel);
+}
+
/* The expected string is less than 16 bytes */
static int do_read_with_fd(int iter_fd, const char *expected,
bool read_one_char)
@@ -1255,6 +1269,8 @@ void test_bpf_iter(void)
test_udp4();
if (test__start_subtest("udp6"))
test_udp6();
+ if (test__start_subtest("unix"))
+ test_unix();
if (test__start_subtest("anon"))
test_anon_iter(false);
if (test__start_subtest("anon-read-one-char"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
new file mode 100644
index 000000000000..85babb0487b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "bpf_dctcp.skel.h"
+#include "bpf_cubic.skel.h"
+#include "bpf_iter_setsockopt.skel.h"
+
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+ return -1;
+
+ return 0;
+}
+
+static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds)
+{
+ unsigned int i;
+
+ for (i = 0; i < nr_fds; i++) {
+ if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic",
+ sizeof("bpf_cubic")))
+ return i;
+ }
+
+ return nr_fds;
+}
+
+static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds)
+{
+ char tcp_cc[16];
+ socklen_t optlen = sizeof(tcp_cc);
+ unsigned int i;
+
+ for (i = 0; i < nr_fds; i++) {
+ if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION,
+ tcp_cc, &optlen) ||
+ strcmp(tcp_cc, "bpf_dctcp"))
+ return i;
+ }
+
+ return nr_fds;
+}
+
+static int *make_established(int listen_fd, unsigned int nr_est,
+ int **paccepted_fds)
+{
+ int *est_fds, *accepted_fds;
+ unsigned int i;
+
+ est_fds = malloc(sizeof(*est_fds) * nr_est);
+ if (!est_fds)
+ return NULL;
+
+ accepted_fds = malloc(sizeof(*accepted_fds) * nr_est);
+ if (!accepted_fds) {
+ free(est_fds);
+ return NULL;
+ }
+
+ for (i = 0; i < nr_est; i++) {
+ est_fds[i] = connect_to_fd(listen_fd, 0);
+ if (est_fds[i] == -1)
+ break;
+ if (set_bpf_cubic(&est_fds[i], 1) != 1) {
+ close(est_fds[i]);
+ break;
+ }
+
+ accepted_fds[i] = accept(listen_fd, NULL, 0);
+ if (accepted_fds[i] == -1) {
+ close(est_fds[i]);
+ break;
+ }
+ }
+
+ if (!ASSERT_EQ(i, nr_est, "create established fds")) {
+ free_fds(accepted_fds, i);
+ free_fds(est_fds, i);
+ return NULL;
+ }
+
+ *paccepted_fds = accepted_fds;
+ return est_fds;
+}
+
+static unsigned short get_local_port(int fd)
+{
+ struct sockaddr_in6 addr;
+ socklen_t addrlen = sizeof(addr);
+
+ if (!getsockname(fd, &addr, &addrlen))
+ return ntohs(addr.sin6_port);
+
+ return 0;
+}
+
+static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel,
+ bool random_retry)
+{
+ int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL;
+ unsigned int nr_reuse_listens = 256, nr_est = 256;
+ int err, iter_fd = -1, listen_fd = -1;
+ char buf;
+
+ /* Prepare non-reuseport listen_fd */
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(listen_fd, 0, "start_server"))
+ return;
+ if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1,
+ "set listen_fd to cubic"))
+ goto done;
+ iter_skel->bss->listen_hport = get_local_port(listen_fd);
+ if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0,
+ "get_local_port(listen_fd)"))
+ goto done;
+
+ /* Connect to non-reuseport listen_fd */
+ est_fds = make_established(listen_fd, nr_est, &accepted_fds);
+ if (!ASSERT_OK_PTR(est_fds, "create established"))
+ goto done;
+
+ /* Prepare reuseport listen fds */
+ reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM,
+ "::1", 0, 0,
+ nr_reuse_listens);
+ if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server"))
+ goto done;
+ if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens),
+ nr_reuse_listens, "set reuse_listen_fds to cubic"))
+ goto done;
+ iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]);
+ if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0,
+ "get_local_port(reuse_listen_fds[0])"))
+ goto done;
+
+ /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */
+ iter_skel->bss->random_retry = random_retry;
+ iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc));
+ if (!ASSERT_GE(iter_fd, 0, "create iter_fd"))
+ goto done;
+
+ while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+ errno == EAGAIN)
+ ;
+ if (!ASSERT_OK(err, "read iter error"))
+ goto done;
+
+ /* Check reuseport listen fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens),
+ nr_reuse_listens,
+ "check reuse_listen_fds dctcp");
+
+ /* Check non reuseport listen fd for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1,
+ "check listen_fd dctcp");
+
+ /* Check established fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est,
+ "check est_fds dctcp");
+
+ /* Check accepted fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est,
+ "check accepted_fds dctcp");
+
+done:
+ if (iter_fd != -1)
+ close(iter_fd);
+ if (listen_fd != -1)
+ close(listen_fd);
+ free_fds(reuse_listen_fds, nr_reuse_listens);
+ free_fds(accepted_fds, nr_est);
+ free_fds(est_fds, nr_est);
+}
+
+void test_bpf_iter_setsockopt(void)
+{
+ struct bpf_iter_setsockopt *iter_skel = NULL;
+ struct bpf_cubic *cubic_skel = NULL;
+ struct bpf_dctcp *dctcp_skel = NULL;
+ struct bpf_link *cubic_link = NULL;
+ struct bpf_link *dctcp_link = NULL;
+
+ if (create_netns())
+ return;
+
+ /* Load iter_skel */
+ iter_skel = bpf_iter_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(iter_skel, "iter_skel"))
+ return;
+ iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL);
+ if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter"))
+ goto done;
+
+ /* Load bpf_cubic */
+ cubic_skel = bpf_cubic__open_and_load();
+ if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel"))
+ goto done;
+ cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
+ if (!ASSERT_OK_PTR(cubic_link, "cubic_link"))
+ goto done;
+
+ /* Load bpf_dctcp */
+ dctcp_skel = bpf_dctcp__open_and_load();
+ if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+ goto done;
+ dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link"))
+ goto done;
+
+ do_bpf_iter_setsockopt(iter_skel, true);
+ do_bpf_iter_setsockopt(iter_skel, false);
+
+done:
+ bpf_link__destroy(cubic_link);
+ bpf_link__destroy(dctcp_link);
+ bpf_cubic__destroy(cubic_skel);
+ bpf_dctcp__destroy(dctcp_skel);
+ bpf_iter_setsockopt__destroy(iter_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index efe1e979affb..94e03df69d71 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -4,37 +4,22 @@
#include <linux/err.h>
#include <netinet/tcp.h>
#include <test_progs.h>
+#include "network_helpers.h"
#include "bpf_dctcp.skel.h"
#include "bpf_cubic.skel.h"
#include "bpf_tcp_nogpl.skel.h"
+#include "bpf_dctcp_release.skel.h"
#define min(a, b) ((a) < (b) ? (a) : (b))
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
static const unsigned int total_bytes = 10 * 1024 * 1024;
-static const struct timeval timeo_sec = { .tv_sec = 10 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
static int expected_stg = 0xeB9F;
static int stop, duration;
-static int settimeo(int fd)
-{
- int err;
-
- err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
- timeo_optlen);
- if (CHECK(err == -1, "setsockopt(fd, SO_RCVTIMEO)", "errno:%d\n",
- errno))
- return -1;
-
- err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
- timeo_optlen);
- if (CHECK(err == -1, "setsockopt(fd, SO_SNDTIMEO)", "errno:%d\n",
- errno))
- return -1;
-
- return 0;
-}
-
static int settcpca(int fd, const char *tcp_ca)
{
int err;
@@ -61,7 +46,7 @@ static void *server(void *arg)
goto done;
}
- if (settimeo(fd)) {
+ if (settimeo(fd, 0)) {
err = -errno;
goto done;
}
@@ -114,7 +99,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
}
if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
- settimeo(lfd) || settimeo(fd))
+ settimeo(lfd, 0) || settimeo(fd, 0))
goto done;
/* bind, listen and start server thread to accept */
@@ -267,6 +252,77 @@ static void test_invalid_license(void)
libbpf_set_print(old_print_fn);
}
+static void test_dctcp_fallback(void)
+{
+ int err, lfd = -1, cli_fd = -1, srv_fd = -1;
+ struct network_helper_opts opts = {
+ .cc = "cubic",
+ };
+ struct bpf_dctcp *dctcp_skel;
+ struct bpf_link *link = NULL;
+ char srv_cc[16];
+ socklen_t cc_len = sizeof(srv_cc);
+
+ dctcp_skel = bpf_dctcp__open();
+ if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+ return;
+ strcpy(dctcp_skel->rodata->fallback, "cubic");
+ if (!ASSERT_OK(bpf_dctcp__load(dctcp_skel), "bpf_dctcp__load"))
+ goto done;
+
+ link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(link, "dctcp link"))
+ goto done;
+
+ lfd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(lfd, 0, "lfd") ||
+ !ASSERT_OK(settcpca(lfd, "bpf_dctcp"), "lfd=>bpf_dctcp"))
+ goto done;
+
+ cli_fd = connect_to_fd_opts(lfd, &opts);
+ if (!ASSERT_GE(cli_fd, 0, "cli_fd"))
+ goto done;
+
+ srv_fd = accept(lfd, NULL, 0);
+ if (!ASSERT_GE(srv_fd, 0, "srv_fd"))
+ goto done;
+ ASSERT_STREQ(dctcp_skel->bss->cc_res, "cubic", "cc_res");
+ ASSERT_EQ(dctcp_skel->bss->tcp_cdg_res, -ENOTSUPP, "tcp_cdg_res");
+
+ err = getsockopt(srv_fd, SOL_TCP, TCP_CONGESTION, srv_cc, &cc_len);
+ if (!ASSERT_OK(err, "getsockopt(srv_fd, TCP_CONGESTION)"))
+ goto done;
+ ASSERT_STREQ(srv_cc, "cubic", "srv_fd cc");
+
+done:
+ bpf_link__destroy(link);
+ bpf_dctcp__destroy(dctcp_skel);
+ if (lfd != -1)
+ close(lfd);
+ if (srv_fd != -1)
+ close(srv_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+}
+
+static void test_rel_setsockopt(void)
+{
+ struct bpf_dctcp_release *rel_skel;
+ libbpf_print_fn_t old_print_fn;
+
+ err_str = "unknown func bpf_setsockopt";
+ found = false;
+
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+ rel_skel = bpf_dctcp_release__open_and_load();
+ libbpf_set_print(old_print_fn);
+
+ ASSERT_ERR_PTR(rel_skel, "rel_skel");
+ ASSERT_TRUE(found, "expected_err_msg");
+
+ bpf_dctcp_release__destroy(rel_skel);
+}
+
void test_bpf_tcp_ca(void)
{
if (test__start_subtest("dctcp"))
@@ -275,4 +331,8 @@ void test_bpf_tcp_ca(void)
test_cubic();
if (test__start_subtest("invalid_license"))
test_invalid_license();
+ if (test__start_subtest("dctcp_fallback"))
+ test_dctcp_fallback();
+ if (test__start_subtest("rel_setsockopt"))
+ test_rel_setsockopt();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 857e3f26086f..649f87382c8d 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -4350,7 +4350,8 @@ static void do_test_file(unsigned int test_num)
goto done;
}
- err = btf__get_from_id(info.btf_id, &btf);
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ err = libbpf_get_error(btf);
if (CHECK(err, "cannot get btf from kernel, err: %d", err))
goto done;
@@ -4386,6 +4387,7 @@ skip:
fprintf(stderr, "OK");
done:
+ btf__free(btf);
free(func_info);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 1b90e684ff13..52ccf0cf35e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -232,7 +232,593 @@ err_out:
btf__free(btf);
}
+#define STRSIZE 4096
+
+static void btf_dump_snprintf(void *ctx, const char *fmt, va_list args)
+{
+ char *s = ctx, new[STRSIZE];
+
+ vsnprintf(new, STRSIZE, fmt, args);
+ if (strlen(s) < STRSIZE)
+ strncat(s, new, STRSIZE - strlen(s) - 1);
+}
+
+static int btf_dump_data(struct btf *btf, struct btf_dump *d,
+ char *name, char *prefix, __u64 flags, void *ptr,
+ size_t ptr_sz, char *str, const char *expected_val)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ size_t type_sz;
+ __s32 type_id;
+ int ret = 0;
+
+ if (flags & BTF_F_COMPACT)
+ opts.compact = true;
+ if (flags & BTF_F_NONAME)
+ opts.skip_names = true;
+ if (flags & BTF_F_ZERO)
+ opts.emit_zeroes = true;
+ if (prefix) {
+ ASSERT_STRNEQ(name, prefix, strlen(prefix),
+ "verify prefix match");
+ name += strlen(prefix) + 1;
+ }
+ type_id = btf__find_by_name(btf, name);
+ if (!ASSERT_GE(type_id, 0, "find type id"))
+ return -ENOENT;
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+ ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, &opts);
+ if (type_sz <= ptr_sz) {
+ if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+ return -EINVAL;
+ } else {
+ if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
+ return -EINVAL;
+ }
+ if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
+ return -EFAULT;
+ return 0;
+}
+
+#define TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \
+ _expected, ...) \
+ do { \
+ char __ptrtype[64] = #_type; \
+ char *_ptrtype = (char *)__ptrtype; \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _ptrtype, _prefix, _flags, \
+ _ptr, sizeof(_type), _str, \
+ _expected); \
+ } while (0)
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_DUMP_DATA_C(_b, _d, _prefix, _str, _type, _flags, \
+ ...) \
+ TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \
+ "(" #_type ")" #__VA_ARGS__, __VA_ARGS__)
+
+/* overflow test; pass typesize < expected type size, ensure E2BIG returned */
+#define TEST_BTF_DUMP_DATA_OVER(_b, _d, _prefix, _str, _type, _type_sz, \
+ _expected, ...) \
+ do { \
+ char __ptrtype[64] = #_type; \
+ char *_ptrtype = (char *)__ptrtype; \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _ptrtype, _prefix, 0, \
+ _ptr, _type_sz, _str, _expected); \
+ } while (0)
+
+#define TEST_BTF_DUMP_VAR(_b, _d, _prefix, _str, _var, _type, _flags, \
+ _expected, ...) \
+ do { \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _var, _prefix, _flags, \
+ _ptr, sizeof(_type), _str, \
+ _expected); \
+ } while (0)
+
+static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+#ifdef __SIZEOF_INT128__
+ __int128 i = 0xffffffffffffffff;
+
+ /* this dance is required because we cannot directly initialize
+ * a 128-bit value to anything larger than a 64-bit value.
+ */
+ i = (i << 64) | (i - 1);
+#endif
+ /* simple int */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, 1234);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "1234", 1234);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)1234", 1234);
+
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT, "(int)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_ZERO,
+ "(int)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, -4567);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "-4567", -4567);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)-4567", -4567);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1);
+
+#ifdef __SIZEOF_INT128__
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT,
+ "(__int128)0xffffffffffffffff",
+ 0xffffffffffffffff);
+ ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str,
+ "(__int128)0xfffffffffffffffffffffffffffffffe"),
+ "dump __int128");
+#endif
+}
+
+static void test_btf_dump_float_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ float t1 = 1.234567;
+ float t2 = -1.234567;
+ float t3 = 0.0;
+ double t4 = 5.678912;
+ double t5 = -5.678912;
+ double t6 = 0.0;
+ long double t7 = 9.876543;
+ long double t8 = -9.876543;
+ long double t9 = 0.0;
+
+ /* since the kernel does not likely have any float types in its BTF, we
+ * will need to add some of various sizes.
+ */
+
+ ASSERT_GT(btf__add_float(btf, "test_float", 4), 0, "add float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t1, 4, str,
+ "(test_float)1.234567"), "dump float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t2, 4, str,
+ "(test_float)-1.234567"), "dump float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t3, 4, str,
+ "(test_float)0.000000"), "dump float");
+
+ ASSERT_GT(btf__add_float(btf, "test_double", 8), 0, "add_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t4, 8, str,
+ "(test_double)5.678912"), "dump double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t5, 8, str,
+ "(test_double)-5.678912"), "dump double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t6, 8, str,
+ "(test_double)0.000000"), "dump double");
+
+ ASSERT_GT(btf__add_float(btf, "test_long_double", 16), 0, "add long double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t7, 16,
+ str, "(test_long_double)9.876543"),
+ "dump long_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t8, 16,
+ str, "(test_long_double)-9.876543"),
+ "dump long_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t9, 16,
+ str, "(test_long_double)0.000000"),
+ "dump long_double");
+}
+
+static void test_btf_dump_char_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* simple char */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, char, BTF_F_COMPACT, 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+ "100", 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)100", 100);
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT,
+ "(char)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_ZERO,
+ "(char)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)0", 0);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, char, sizeof(char)-1, "", 100);
+}
+
+static void test_btf_dump_typedef_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* simple typedef */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, uint64_t, BTF_F_COMPACT, 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+ "1", 1);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)1", 1);
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT, "(u64)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_ZERO,
+ "(u64)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)0", 0);
+
+ /* typedef struct */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, atomic_t, BTF_F_COMPACT,
+ {.counter = (int)1,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+ "{1,}", { .counter = 1 });
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+" .counter = (int)1,\n"
+"}",
+ {.counter = 1,});
+ /* typedef with 0 value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, "(atomic_t){}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+ "{}", {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+"}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_ZERO,
+ "(atomic_t){.counter = (int)0,}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "{0,}", {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_ZERO,
+"(atomic_t){\n"
+" .counter = (int)0,\n"
+"}",
+ { .counter = 0,});
+
+ /* overflow should show type but not value since it overflows */
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, atomic_t, sizeof(atomic_t)-1,
+ "(atomic_t){\n", { .counter = 1});
+}
+
+static void test_btf_dump_enum_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* enum where enum value does (and does not) exist */
+ TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+ "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+ "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "BPF_MAP_CREATE", 0);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_ZERO,
+ "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "BPF_MAP_CREATE", BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 2000);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "2000", 2000);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+ "(enum bpf_cmd)2000", 2000);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "enum", str, enum bpf_cmd,
+ sizeof(enum bpf_cmd) - 1, "", BPF_MAP_CREATE);
+}
+
+static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ char zero_data[512] = { };
+ char type_data[512];
+ void *fops = type_data;
+ void *skb = type_data;
+ size_t type_sz;
+ __s32 type_id;
+ char *cmpstr;
+ int ret;
+
+ memset(type_data, 255, sizeof(type_data));
+
+ /* simple struct */
+ TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+ {.name_off = (__u32)3,.val = (__s32)-1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{3,-1,}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+" .name_off = (__u32)3,\n"
+" .val = (__s32)-1,\n"
+"}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{-1,}",
+ { .name_off = 0, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "{0,-1,}",
+ { .name_off = 0, .val = -1,});
+ /* empty struct should be printed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+ "(struct btf_enum){}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+"}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_ZERO,
+ "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_ZERO,
+"(struct btf_enum){\n"
+" .name_off = (__u32)0,\n"
+" .val = (__s32)0,\n"
+"}",
+ { .name_off = 0, .val = 0,});
+
+ /* struct with pointers */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+ "(struct list_head){.next = (struct list_head *)0x1,}",
+ { .next = (struct list_head *)1 });
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+" .next = (struct list_head *)0x1,\n"
+"}",
+ { .next = (struct list_head *)1 });
+ /* NULL pointer should not be displayed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+ "(struct list_head){}",
+ { .next = (struct list_head *)0 });
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+"}",
+ { .next = (struct list_head *)0 });
+
+ /* struct with function pointers */
+ type_id = btf__find_by_name(btf, "file_operations");
+ if (ASSERT_GT(type_id, 0, "find type id")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ ret = btf_dump__dump_type_data(d, type_id, fops, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping file_operations");
+ cmpstr =
+"(struct file_operations){\n"
+" .owner = (struct module *)0xffffffffffffffff,\n"
+" .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,";
+
+ ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations");
+ }
+
+ /* struct with char array */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){.name = (char[16])['f','o','o',],}",
+ { .name = "foo",});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{['f','o','o',],}",
+ {.name = "foo",});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 0,
+"(struct bpf_prog_info){\n"
+" .name = (char[16])[\n"
+" 'f',\n"
+" 'o',\n"
+" 'o',\n"
+" ],\n"
+"}",
+ {.name = "foo",});
+ /* leading null char means do not display string */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){}",
+ {.name = {'\0', 'f', 'o', 'o'}});
+ /* handle non-printable characters */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){.name = (char[16])[1,2,3,],}",
+ { .name = {1, 2, 3, 0}});
+
+ /* struct with non-char array */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+ "(struct __sk_buff){.cb = (__u32[5])[1,2,3,4,5,],}",
+ { .cb = {1, 2, 3, 4, 5,},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{[1,2,3,4,5,],}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+" .cb = (__u32[5])[\n"
+" 1,\n"
+" 2,\n"
+" 3,\n"
+" 4,\n"
+" 5,\n"
+" ],\n"
+"}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ /* For non-char, arrays, show non-zero values only */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+ "(struct __sk_buff){.cb = (__u32[5])[0,0,1,0,0,],}",
+ { .cb = { 0, 0, 1, 0, 0},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+" .cb = (__u32[5])[\n"
+" 0,\n"
+" 0,\n"
+" 1,\n"
+" 0,\n"
+" 0,\n"
+" ],\n"
+"}",
+ { .cb = { 0, 0, 1, 0, 0},});
+
+ /* struct with bitfields */
+ TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+ {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{1,0x2,0x3,4,5,}",
+ { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+ .imm = 5,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 0,
+"(struct bpf_insn){\n"
+" .code = (__u8)1,\n"
+" .dst_reg = (__u8)0x2,\n"
+" .src_reg = (__u8)0x3,\n"
+" .off = (__s16)4,\n"
+" .imm = (__s32)5,\n"
+"}",
+ {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5});
+
+ /* zeroed bitfields should not be displayed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+ "(struct bpf_insn){.dst_reg = (__u8)0x1,}",
+ { .code = 0, .dst_reg = 1});
+
+ /* struct with enum bitfield */
+ type_id = btf__find_by_name(btf, "fs_context");
+ if (ASSERT_GT(type_id, 0, "find fs_context")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ opts.emit_zeroes = true;
+ ret = btf_dump__dump_type_data(d, type_id, zero_data, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping fs_context");
+
+ ASSERT_NEQ(strstr(str, "FS_CONTEXT_FOR_MOUNT"), NULL,
+ "bitfield value not present");
+ }
+
+ /* struct with nested anon union */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_sock_ops, BTF_F_COMPACT,
+ "(struct bpf_sock_ops){.op = (__u32)1,(union){.args = (__u32[4])[1,2,3,4,],.reply = (__u32)1,.replylong = (__u32[4])[1,2,3,4,],},}",
+ { .op = 1, .args = { 1, 2, 3, 4}});
+
+ /* union with nested struct */
+ TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
+ "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}",
+ { .map = { .map_fd = 1 }});
+
+ /* struct skb with nested structs/unions; because type output is so
+ * complex, we don't do a string comparison, just verify we return
+ * the type size as the amount of data displayed.
+ */
+ type_id = btf__find_by_name(btf, "sk_buff");
+ if (ASSERT_GT(type_id, 0, "find struct sk_buff")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ ret = btf_dump__dump_type_data(d, type_id, skb, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping sk_buff");
+ }
+
+ /* overflow bpf_sock_ops struct with final element nonzero/zero.
+ * Regardless of the value of the final field, we don't have all the
+ * data we need to display it, so we should trigger an overflow.
+ * In other words oveflow checking should trump "is field zero?"
+ * checks because if we've overflowed, it shouldn't matter what the
+ * field is - we can't trust its value so shouldn't display it.
+ */
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+ sizeof(struct bpf_sock_ops) - 1,
+ "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+ { .op = 1, .skb_tcp_flags = 2});
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+ sizeof(struct bpf_sock_ops) - 1,
+ "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+ { .op = 1, .skb_tcp_flags = 0});
+}
+
+static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
+ "int cpu_number = (int)100", 100);
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT,
+ "static int cpu_profile_flip = (int)2", 2);
+}
+
+static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
+ const char *name, const char *expected_val,
+ void *data, size_t data_sz)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ int ret = 0, cmp;
+ size_t secsize;
+ __s32 type_id;
+
+ opts.compact = true;
+
+ type_id = btf__find_by_name(btf, name);
+ if (!ASSERT_GT(type_id, 0, "find type id"))
+ return;
+
+ secsize = btf__resolve_size(btf, type_id);
+ ASSERT_EQ(secsize, 0, "verify section size");
+
+ str[0] = '\0';
+ ret = btf_dump__dump_type_data(d, type_id, data, data_sz, &opts);
+ ASSERT_EQ(ret, 0, "unexpected return value");
+
+ cmp = strcmp(str, expected_val);
+ ASSERT_EQ(cmp, 0, "ensure expected/actual match");
+}
+
+static void test_btf_dump_datasec_data(char *str)
+{
+ struct btf *btf = btf__parse("xdping_kern.o", NULL);
+ struct btf_dump_opts opts = { .ctx = str };
+ char license[4] = "GPL";
+ struct btf_dump *d;
+
+ if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found"))
+ return;
+
+ d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ test_btf_datasec(btf, d, str, "license",
+ "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];",
+ license, sizeof(license));
+}
+
void test_btf_dump() {
+ char str[STRSIZE];
+ struct btf_dump_opts opts = { .ctx = str };
+ struct btf_dump *d;
+ struct btf *btf;
int i;
for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) {
@@ -245,4 +831,33 @@ void test_btf_dump() {
}
if (test__start_subtest("btf_dump: incremental"))
test_btf_dump_incremental();
+
+ btf = libbpf_find_kernel_btf();
+ if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
+ return;
+
+ d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ /* Verify type display for various types. */
+ if (test__start_subtest("btf_dump: int_data"))
+ test_btf_dump_int_data(btf, d, str);
+ if (test__start_subtest("btf_dump: float_data"))
+ test_btf_dump_float_data(btf, d, str);
+ if (test__start_subtest("btf_dump: char_data"))
+ test_btf_dump_char_data(btf, d, str);
+ if (test__start_subtest("btf_dump: typedef_data"))
+ test_btf_dump_typedef_data(btf, d, str);
+ if (test__start_subtest("btf_dump: enum_data"))
+ test_btf_dump_enum_data(btf, d, str);
+ if (test__start_subtest("btf_dump: struct_data"))
+ test_btf_dump_struct_data(btf, d, str);
+ if (test__start_subtest("btf_dump: var_data"))
+ test_btf_dump_var_data(btf, d, str);
+ btf_dump__free(d);
+ btf__free(btf);
+
+ if (test__start_subtest("btf_dump: datasec_data"))
+ test_btf_dump_datasec_data(str);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_module.c b/tools/testing/selftests/bpf/prog_tests/btf_module.c
new file mode 100644
index 000000000000..2239d1fe0332
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_module.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static const char *module_name = "bpf_testmod";
+static const char *symbol_name = "bpf_testmod_test_read";
+
+void test_btf_module()
+{
+ struct btf *vmlinux_btf, *module_btf;
+ __s32 type_id;
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+ return;
+
+ module_btf = btf__load_module_btf(module_name, vmlinux_btf);
+ if (!ASSERT_OK_PTR(module_btf, "could not load module BTF"))
+ goto cleanup;
+
+ type_id = btf__find_by_name(module_btf, symbol_name);
+ ASSERT_GT(type_id, 0, "func not found");
+
+cleanup:
+ btf__free(module_btf);
+ btf__free(vmlinux_btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
index 981c251453d9..3d4b2a358d47 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
@@ -53,8 +53,8 @@ void test_core_autosize(void)
char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
int err, fd = -1, zero = 0;
int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
struct test_core_autosize* skel = NULL;
- struct bpf_object_load_attr load_attr = {};
struct bpf_program *prog;
struct bpf_map *bss_map;
struct btf *btf = NULL;
@@ -125,9 +125,10 @@ void test_core_autosize(void)
fd = -1;
/* open and load BPF program with custom BTF as the kernel BTF */
- skel = test_core_autosize__open();
+ open_opts.btf_custom_path = btf_file;
+ skel = test_core_autosize__open_opts(&open_opts);
if (!ASSERT_OK_PTR(skel, "skel_open"))
- return;
+ goto cleanup;
/* disable handle_signed() for now */
prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
@@ -135,9 +136,7 @@ void test_core_autosize(void)
goto cleanup;
bpf_program__set_autoload(prog, false);
- load_attr.obj = skel->obj;
- load_attr.target_btf_path = btf_file;
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(skel->obj);
if (!ASSERT_OK(err, "prog_load"))
goto cleanup;
@@ -204,14 +203,13 @@ void test_core_autosize(void)
skel = NULL;
/* now re-load with handle_signed() enabled, it should fail loading */
- skel = test_core_autosize__open();
+ open_opts.btf_custom_path = btf_file;
+ skel = test_core_autosize__open_opts(&open_opts);
if (!ASSERT_OK_PTR(skel, "skel_open"))
- return;
+ goto cleanup;
- load_attr.obj = skel->obj;
- load_attr.target_btf_path = btf_file;
- err = bpf_object__load_xattr(&load_attr);
- if (!ASSERT_ERR(err, "bad_prog_load"))
+ err = test_core_autosize__load(skel);
+ if (!ASSERT_ERR(err, "skel_load"))
goto cleanup;
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index d02e064c535f..4739b15b2a97 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -816,7 +816,7 @@ static size_t roundup_page(size_t sz)
void test_core_reloc(void)
{
const size_t mmap_sz = roundup_page(sizeof(struct data));
- struct bpf_object_load_attr load_attr = {};
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
struct core_reloc_test_case *test_case;
const char *tp_name, *probe_name;
int err, i, equal;
@@ -846,9 +846,16 @@ void test_core_reloc(void)
continue;
}
- obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
+ if (test_case->btf_src_file) {
+ err = access(test_case->btf_src_file, R_OK);
+ if (!ASSERT_OK(err, "btf_src_file"))
+ goto cleanup;
+ }
+
+ open_opts.btf_custom_path = test_case->btf_src_file;
+ obj = bpf_object__open_file(test_case->bpf_obj_file, &open_opts);
if (!ASSERT_OK_PTR(obj, "obj_open"))
- continue;
+ goto cleanup;
probe_name = "raw_tracepoint/sys_enter";
tp_name = "sys_enter";
@@ -862,17 +869,7 @@ void test_core_reloc(void)
"prog '%s' not found\n", probe_name))
goto cleanup;
-
- if (test_case->btf_src_file) {
- err = access(test_case->btf_src_file, R_OK);
- if (!ASSERT_OK(err, "btf_src_file"))
- goto cleanup;
- }
-
- load_attr.obj = obj;
- load_attr.log_level = 0;
- load_attr.target_btf_path = test_case->btf_src_file;
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(obj);
if (err) {
if (!test_case->fails)
ASSERT_OK(err, "obj_load");
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
new file mode 100644
index 000000000000..02a465f36d59
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "get_func_ip_test.skel.h"
+
+void test_get_func_ip_test(void)
+{
+ struct get_func_ip_test *skel = NULL;
+ __u32 duration = 0, retval;
+ int err, prog_fd;
+
+ skel = get_func_ip_test__open();
+ if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
+ return;
+
+ /* test6 is x86_64 specifc because of the instruction
+ * offset, disabling it for all other archs
+ */
+#ifndef __x86_64__
+ bpf_program__set_autoload(skel->progs.test6, false);
+ bpf_program__set_autoload(skel->progs.test7, false);
+#endif
+
+ err = get_func_ip_test__load(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__load"))
+ goto cleanup;
+
+ err = get_func_ip_test__attach(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 0, "test_run");
+
+ prog_fd = bpf_program__fd(skel->progs.test5);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+
+ ASSERT_OK(err, "test_run");
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
+ ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
+ ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
+ ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
+ ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
+#ifdef __x86_64__
+ ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
+ ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
+#endif
+
+cleanup:
+ get_func_ip_test__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 30a7b9b837bf..9611f2bc50df 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -44,7 +44,7 @@ static void test_subprog(void)
ASSERT_OK(err, "bpf_prog_test_run(test1)");
ASSERT_EQ(retval, 10, "test1-retval");
ASSERT_NEQ(skel->data->active_res, -1, "active_res");
- ASSERT_EQ(skel->data->sk_state, BPF_TCP_CLOSE, "sk_state");
+ ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
kfunc_call_test_subprog__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index 67bebd324147..cf3acfa5a91d 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -6,6 +6,7 @@
#include <bpf/btf.h>
#include "test_ksyms_btf.skel.h"
#include "test_ksyms_btf_null_check.skel.h"
+#include "test_ksyms_weak.skel.h"
static int duration;
@@ -81,6 +82,33 @@ static void test_null_check(void)
test_ksyms_btf_null_check__destroy(skel);
}
+static void test_weak_syms(void)
+{
+ struct test_ksyms_weak *skel;
+ struct test_ksyms_weak__data *data;
+ int err;
+
+ skel = test_ksyms_weak__open_and_load();
+ if (CHECK(!skel, "test_ksyms_weak__open_and_load", "failed\n"))
+ return;
+
+ err = test_ksyms_weak__attach(skel);
+ if (CHECK(err, "test_ksyms_weak__attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym");
+ ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym");
+
+cleanup:
+ test_ksyms_weak__destroy(skel);
+}
+
void test_ksyms_btf(void)
{
int percpu_datasec;
@@ -105,4 +133,7 @@ void test_ksyms_btf(void)
if (test__start_subtest("null_check"))
test_null_check();
+
+ if (test__start_subtest("weak_ksyms"))
+ test_weak_syms();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c
new file mode 100644
index 000000000000..6ede48bde91b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/sysinfo.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "netcnt_prog.skel.h"
+#include "netcnt_common.h"
+
+#define CG_NAME "/netcnt"
+
+void test_netcnt(void)
+{
+ union percpu_net_cnt *percpu_netcnt = NULL;
+ struct bpf_cgroup_storage_key key;
+ int map_fd, percpu_map_fd;
+ struct netcnt_prog *skel;
+ unsigned long packets;
+ union net_cnt netcnt;
+ unsigned long bytes;
+ int cpu, nproc;
+ int cg_fd = -1;
+ char cmd[128];
+
+ skel = netcnt_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
+ return;
+
+ nproc = get_nprocs_conf();
+ percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
+ if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
+ goto err;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup"))
+ goto err;
+
+ skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt,
+ "attach_cgroup(bpf_nextcnt)"))
+ goto err;
+
+ snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6));
+ ASSERT_OK(system(cmd), cmd);
+
+ map_fd = bpf_map__fd(skel->maps.netcnt);
+ if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key"))
+ goto err;
+
+ if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)"))
+ goto err;
+
+ percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt);
+ if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]),
+ "bpf_map_lookup_elem(percpu_netcnt)"))
+ goto err;
+
+ /* Some packets can be still in per-cpu cache, but not more than
+ * MAX_PERCPU_PACKETS.
+ */
+ packets = netcnt.packets;
+ bytes = netcnt.bytes;
+ for (cpu = 0; cpu < nproc; cpu++) {
+ ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS");
+
+ packets += percpu_netcnt[cpu].packets;
+ bytes += percpu_netcnt[cpu].bytes;
+ }
+
+ /* No packets should be lost */
+ ASSERT_EQ(packets, 10000, "packets");
+
+ /* Let's check that bytes counter matches the number of packets
+ * multiplied by the size of ipv6 ICMP packet.
+ */
+ ASSERT_EQ(bytes, packets * 104, "bytes");
+
+err:
+ if (cg_fd != -1)
+ close(cg_fd);
+ free(percpu_netcnt);
+ netcnt_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
new file mode 100644
index 000000000000..71d8f3ba7d6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "netns_cookie_prog.skel.h"
+#include "network_helpers.h"
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+static int duration;
+
+void test_netns_cookie(void)
+{
+ int server_fd = -1, client_fd = -1, cgroup_fd = -1;
+ int err, val, ret, map, verdict;
+ struct netns_cookie_prog *skel;
+ uint64_t cookie_expected_value;
+ socklen_t vallen = sizeof(cookie_expected_value);
+ static const char send_msg[] = "message";
+
+ skel = netns_cookie_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ cgroup_fd = test__join_cgroup("/netns_cookie");
+ if (CHECK(cgroup_fd < 0, "join_cgroup", "cgroup creation failed\n"))
+ goto done;
+
+ skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup(
+ skel->progs.get_netns_cookie_sockops, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach"))
+ goto done;
+
+ verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg);
+ map = bpf_map__fd(skel->maps.sock_map);
+ err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "prog_attach"))
+ goto done;
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
+ goto done;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno))
+ goto done;
+
+ ret = send(client_fd, send_msg, sizeof(send_msg), 0);
+ if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", ret))
+ goto done;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sockops_netns_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(sockops_netns_cookies)"))
+ goto done;
+
+ err = getsockopt(client_fd, SOL_SOCKET, SO_NETNS_COOKIE,
+ &cookie_expected_value, &vallen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto done;
+
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)"))
+ goto done;
+
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+
+done:
+ if (server_fd != -1)
+ close(server_fd);
+ if (client_fd != -1)
+ close(client_fd);
+ if (cgroup_fd != -1)
+ close(cgroup_fd);
+ netns_cookie_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
new file mode 100644
index 000000000000..b1abd0c46607
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "test_perf_link.skel.h"
+
+static void burn_cpu(void)
+{
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+ int i, err;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ ASSERT_OK(err, "set_thread_affinity");
+
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+}
+
+void test_perf_link(void)
+{
+ struct test_perf_link *skel = NULL;
+ struct perf_event_attr attr;
+ int pfd = -1, link_fd = -1, err;
+ int run_cnt_before, run_cnt_after;
+ struct bpf_link_info info;
+ __u32 info_len = sizeof(info);
+
+ /* create perf event */
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(pfd, 0, "perf_fd"))
+ goto cleanup;
+
+ skel = test_perf_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ link_fd = bpf_link_create(bpf_program__fd(skel->progs.handler), pfd,
+ BPF_PERF_EVENT, NULL);
+ if (!ASSERT_GE(link_fd, 0, "link_fd"))
+ goto cleanup;
+
+ memset(&info, 0, sizeof(info));
+ err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
+ if (!ASSERT_OK(err, "link_get_info"))
+ goto cleanup;
+
+ ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "link_type");
+ ASSERT_GT(info.id, 0, "link_id");
+ ASSERT_GT(info.prog_id, 0, "link_prog_id");
+
+ /* ensure we get at least one perf_event prog execution */
+ burn_cpu();
+ ASSERT_GT(skel->bss->run_cnt, 0, "run_cnt");
+
+ /* perf_event is still active, but we close link and BPF program
+ * shouldn't be executed anymore
+ */
+ close(link_fd);
+ link_fd = -1;
+
+ /* make sure there are no stragglers */
+ kern_sync_rcu();
+
+ run_cnt_before = skel->bss->run_cnt;
+ burn_cpu();
+ run_cnt_after = skel->bss->run_cnt;
+
+ ASSERT_EQ(run_cnt_before, run_cnt_after, "run_cnt_before_after");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ if (pfd >= 0)
+ close(pfd);
+ test_perf_link__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
index fcf54b3a1dd0..d4b953ae3407 100644
--- a/tools/testing/selftests/bpf/prog_tests/pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -125,6 +125,10 @@ void test_pinning(void)
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
goto out;
+ /* get pinning path */
+ if (!ASSERT_STREQ(bpf_map__pin_path(map), pinpath, "get pin path"))
+ goto out;
+
/* set pinning path of other map and re-pin all */
map = bpf_object__find_map_by_name(obj, "nopinmap");
if (CHECK(!map, "find map", "NULL map"))
@@ -134,6 +138,11 @@ void test_pinning(void)
if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
goto out;
+ /* get pinning path after set */
+ if (!ASSERT_STREQ(bpf_map__pin_path(map), custpinpath,
+ "get pin path after set"))
+ goto out;
+
/* should only pin the one unpinned map */
err = bpf_object__pin_maps(obj, NULL);
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index de2688166696..4e91f4d6466c 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -34,8 +34,8 @@ void test_reference_tracking(void)
if (!test__start_subtest(title))
continue;
- /* Expect verifier failure if test name has 'fail' */
- if (strstr(title, "fail") != NULL) {
+ /* Expect verifier failure if test name has 'err' */
+ if (strstr(title, "err_") != NULL) {
libbpf_print_fn_t old_print_fn;
old_print_fn = libbpf_set_print(NULL);
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 023cc532992d..776916b61c40 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <sys/time.h>
+#include <sys/resource.h>
#include "test_send_signal_kern.skel.h"
int sigusr1_received = 0;
@@ -10,29 +12,25 @@ static void sigusr1_handler(int signum)
}
static void test_send_signal_common(struct perf_event_attr *attr,
- bool signal_thread,
- const char *test_name)
+ bool signal_thread)
{
struct test_send_signal_kern *skel;
int pipe_c2p[2], pipe_p2c[2];
int err = -1, pmu_fd = -1;
- __u32 duration = 0;
char buf[256];
pid_t pid;
- if (CHECK(pipe(pipe_c2p), test_name,
- "pipe pipe_c2p error: %s\n", strerror(errno)))
+ if (!ASSERT_OK(pipe(pipe_c2p), "pipe_c2p"))
return;
- if (CHECK(pipe(pipe_p2c), test_name,
- "pipe pipe_p2c error: %s\n", strerror(errno))) {
+ if (!ASSERT_OK(pipe(pipe_p2c), "pipe_p2c")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
return;
}
pid = fork();
- if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) {
+ if (!ASSERT_GE(pid, 0, "fork")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -41,26 +39,40 @@ static void test_send_signal_common(struct perf_event_attr *attr,
}
if (pid == 0) {
+ int old_prio;
+
/* install signal handler and notify parent */
signal(SIGUSR1, sigusr1_handler);
close(pipe_c2p[0]); /* close read */
close(pipe_p2c[1]); /* close write */
+ /* boost with a high priority so we got a higher chance
+ * that if an interrupt happens, the underlying task
+ * is this process.
+ */
+ errno = 0;
+ old_prio = getpriority(PRIO_PROCESS, 0);
+ ASSERT_OK(errno, "getpriority");
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority");
+
/* notify parent signal handler is installed */
- CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* make sure parent enabled bpf program to send_signal */
- CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
/* wait a little for signal handler */
sleep(1);
buf[0] = sigusr1_received ? '2' : '0';
- CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* wait for parent notification and exit */
- CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
+
+ /* restore the old priority */
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority");
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -71,20 +83,19 @@ static void test_send_signal_common(struct perf_event_attr *attr,
close(pipe_p2c[0]); /* close read */
skel = test_send_signal_kern__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
goto skel_open_load_failure;
if (!attr) {
err = test_send_signal_kern__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed\n")) {
+ if (!ASSERT_OK(err, "skel_attach")) {
err = -1;
goto destroy_skel;
}
} else {
pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
-1 /* group id */, 0 /* flags */);
- if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
- strerror(errno))) {
+ if (!ASSERT_GE(pmu_fd, 0, "perf_event_open")) {
err = -1;
goto destroy_skel;
}
@@ -96,7 +107,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
}
/* wait until child signal handler installed */
- CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read");
/* trigger the bpf send_signal */
skel->bss->pid = pid;
@@ -104,21 +115,21 @@ static void test_send_signal_common(struct perf_event_attr *attr,
skel->bss->signal_thread = signal_thread;
/* notify child that bpf program can send_signal now */
- CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
/* wait for result */
err = read(pipe_c2p[0], buf, 1);
- if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno)))
+ if (!ASSERT_GE(err, 0, "reading pipe"))
goto disable_pmu;
- if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) {
+ if (!ASSERT_GT(err, 0, "reading pipe error: size 0")) {
err = -1;
goto disable_pmu;
}
- CHECK(buf[0] != '2', test_name, "incorrect result\n");
+ ASSERT_EQ(buf[0], '2', "incorrect result");
/* notify child safe to exit */
- CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
disable_pmu:
close(pmu_fd);
@@ -132,7 +143,7 @@ skel_open_load_failure:
static void test_send_signal_tracepoint(bool signal_thread)
{
- test_send_signal_common(NULL, signal_thread, "tracepoint");
+ test_send_signal_common(NULL, signal_thread);
}
static void test_send_signal_perf(bool signal_thread)
@@ -143,7 +154,7 @@ static void test_send_signal_perf(bool signal_thread)
.config = PERF_COUNT_SW_CPU_CLOCK,
};
- test_send_signal_common(&attr, signal_thread, "perf_sw_event");
+ test_send_signal_common(&attr, signal_thread);
}
static void test_send_signal_nmi(bool signal_thread)
@@ -172,7 +183,7 @@ static void test_send_signal_nmi(bool signal_thread)
close(pmu_fd);
}
- test_send_signal_common(&attr, signal_thread, "perf_hw_event");
+ test_send_signal_common(&attr, signal_thread);
}
void test_send_signal(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index dffbcaa1ec98..8fd1b4b29a0e 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -19,7 +19,7 @@
#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
-#define EXP_STR_OUT "str1 longstr"
+#define EXP_STR_OUT "str1 a b c d e longstr"
#define EXP_STR_RET sizeof(EXP_STR_OUT)
#define EXP_OVER_OUT "%over"
@@ -114,6 +114,8 @@ void test_snprintf_negative(void)
ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3");
ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4");
ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
+ ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6");
+ ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 515229f24a93..5c5979046523 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -351,9 +351,11 @@ static void test_insert_opened(int family, int sotype, int mapfd)
errno = 0;
value = s;
err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
- if (!err || errno != EOPNOTSUPP)
- FAIL_ERRNO("map_update: expected EOPNOTSUPP");
-
+ if (sotype == SOCK_STREAM) {
+ if (!err || errno != EOPNOTSUPP)
+ FAIL_ERRNO("map_update: expected EOPNOTSUPP");
+ } else if (err)
+ FAIL_ERRNO("map_update: expected success");
xclose(s);
}
@@ -919,6 +921,23 @@ static const char *redir_mode_str(enum redir_mode mode)
}
}
+static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
+{
+ u64 value;
+ u32 key;
+ int err;
+
+ key = 0;
+ value = fd1;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ return err;
+
+ key = 1;
+ value = fd2;
+ return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+}
+
static void redir_to_connected(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
@@ -928,9 +947,9 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
unsigned int pass;
socklen_t len;
int err, n;
- u64 value;
u32 key;
char b;
+ int retries = 100;
zero_verdict_count(verd_mapfd);
@@ -965,15 +984,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (p1 < 0)
goto close_cli1;
- key = 0;
- value = p0;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer1;
-
- key = 1;
- value = p1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
goto close_peer1;
@@ -991,10 +1002,15 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
goto close_peer1;
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
+again:
n = read(c0, &b, 1);
- if (n < 0)
+ if (n < 0) {
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
+ goto again;
+ }
FAIL_ERRNO("%s: read", log_prefix);
+ }
if (n == 0)
FAIL("%s: incomplete read", log_prefix);
@@ -1061,7 +1077,6 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
int s, c, p, err, n;
unsigned int drop;
socklen_t len;
- u64 value;
u32 key;
zero_verdict_count(verd_mapfd);
@@ -1086,15 +1101,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
if (p < 0)
goto close_cli;
- key = 0;
- value = s;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer;
-
- key = 1;
- value = p;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_mapfd, s, p);
if (err)
goto close_peer;
@@ -1346,7 +1353,6 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
int s1, s2, c, err;
unsigned int drop;
socklen_t len;
- u64 value;
u32 key;
zero_verdict_count(verd_map);
@@ -1360,16 +1366,10 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
if (s2 < 0)
goto close_srv1;
- key = 0;
- value = s1;
- err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_map, s1, s2);
if (err)
goto close_srv2;
- key = 1;
- value = s2;
- err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
-
/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
len = sizeof(addr);
err = xgetsockname(s2, sockaddr(&addr), &len);
@@ -1441,6 +1441,8 @@ static const char *family_str(sa_family_t family)
return "IPv4";
case AF_INET6:
return "IPv6";
+ case AF_UNIX:
+ return "Unix";
default:
return "unknown";
}
@@ -1563,6 +1565,101 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
}
}
+static void unix_redir_to_connected(int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int retries = 100;
+ int err, n;
+ int sfd[2];
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+ return;
+ c0 = sfd[0], p0 = sfd[1];
+
+ if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+ goto close0;
+ c1 = sfd[0], p1 = sfd[1];
+
+ err = add_to_sockmap(sock_mapfd, p0, p1);
+ if (err)
+ goto close;
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+again:
+ n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ if (n < 0) {
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
+ goto again;
+ }
+ FAIL_ERRNO("%s: read", log_prefix);
+ }
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close:
+ xclose(c1);
+ xclose(p1);
+close0:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int sotype)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(AF_UNIX);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ unix_skb_redir_to_connected(skel, map, sotype);
+}
+
static void test_reuseport(struct test_sockmap_listen *skel,
struct bpf_map *map, int family, int sotype)
{
@@ -1603,33 +1700,27 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
-static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
+static int inet_socketpair(int family, int type, int *s, int *c)
{
- const char *log_prefix = redir_mode_str(mode);
struct sockaddr_storage addr;
- int c0, c1, p0, p1;
- unsigned int pass;
- int retries = 100;
socklen_t len;
- int err, n;
- u64 value;
- u32 key;
- char b;
-
- zero_verdict_count(verd_mapfd);
+ int p0, c0;
+ int err;
- p0 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ p0 = socket_loopback(family, type | SOCK_NONBLOCK);
if (p0 < 0)
- return;
+ return p0;
+
len = sizeof(addr);
err = xgetsockname(p0, sockaddr(&addr), &len);
if (err)
goto close_peer0;
- c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
- if (c0 < 0)
+ c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
+ if (c0 < 0) {
+ err = c0;
goto close_peer0;
+ }
err = xconnect(c0, sockaddr(&addr), len);
if (err)
goto close_cli0;
@@ -1640,35 +1731,133 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
if (err)
goto close_cli0;
- p1 = socket_loopback(family, sotype | SOCK_NONBLOCK);
- if (p1 < 0)
- goto close_cli0;
- err = xgetsockname(p1, sockaddr(&addr), &len);
+ *s = p0;
+ *c = c0;
+ return 0;
+
+close_cli0:
+ xclose(c0);
+close_peer0:
+ xclose(p0);
+ return err;
+}
+
+static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
+ enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int retries = 100;
+ int err, n;
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+ if (err)
+ return;
+ err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
if (err)
goto close_cli0;
- c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
- if (c1 < 0)
- goto close_peer1;
- err = xconnect(c1, sockaddr(&addr), len);
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
goto close_cli1;
- err = xgetsockname(c1, sockaddr(&addr), &len);
- if (err)
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
goto close_cli1;
- err = xconnect(p1, sockaddr(&addr), len);
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
if (err)
goto close_cli1;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
- key = 0;
- value = p0;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+again:
+ n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ if (n < 0) {
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
+ goto again;
+ }
+ FAIL_ERRNO("%s: read", log_prefix);
+ }
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close_cli1:
+ xclose(c1);
+ xclose(p1);
+close_cli0:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
if (err)
- goto close_cli1;
+ return;
- key = 1;
- value = p1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ skel->bss->test_ingress = false;
+ udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ udp_skb_redir_to_connected(skel, map, family);
+}
+
+static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int retries = 100;
+ int err, n;
+ int sfd[2];
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ return;
+ c0 = sfd[0], p0 = sfd[1];
+
+ err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
+ if (err)
+ goto close;
+
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
goto close_cli1;
@@ -1690,8 +1879,10 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
again:
n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
if (n < 0) {
- if (errno == EAGAIN && retries--)
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
goto again;
+ }
FAIL_ERRNO("%s: read", log_prefix);
}
if (n == 0)
@@ -1699,16 +1890,102 @@ again:
close_cli1:
xclose(c1);
-close_peer1:
+ xclose(p1);
+close:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+ inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int err, n;
+ int sfd[2];
+ u32 key;
+ char b;
+ int retries = 100;
+
+ zero_verdict_count(verd_mapfd);
+
+ err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+ if (err)
+ return;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ goto close_cli0;
+ c1 = sfd[0], p1 = sfd[1];
+
+ err = add_to_sockmap(sock_mapfd, p0, p1);
+ if (err)
+ goto close;
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+again:
+ n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ if (n < 0) {
+ if (errno == EAGAIN && retries--) {
+ usleep(1000);
+ goto again;
+ }
+ FAIL_ERRNO("%s: read", log_prefix);
+ }
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close:
+ xclose(c1);
xclose(p1);
close_cli0:
xclose(c0);
-close_peer0:
xclose(p0);
+
}
-static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
+static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
{
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
@@ -1720,17 +1997,21 @@ static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
return;
skel->bss->test_ingress = false;
- udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_EGRESS);
+ unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_EGRESS);
skel->bss->test_ingress = true;
- udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_INGRESS);
+ unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_INGRESS);
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
-static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int family)
+static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
{
const char *family_name, *map_name;
char s[MAX_TEST_NAME];
@@ -1740,7 +2021,8 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
if (!test__start_subtest(s))
return;
- udp_skb_redir_to_connected(skel, map, family);
+ inet_unix_skb_redir_to_connected(skel, map, family);
+ unix_inet_skb_redir_to_connected(skel, map, family);
}
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
@@ -1752,6 +2034,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
test_udp_redir(skel, map, family);
+ test_udp_unix_redir(skel, map, family);
}
void test_sockmap_listen(void)
@@ -1767,10 +2050,14 @@ void test_sockmap_listen(void)
skel->bss->test_sockmap = true;
run_tests(skel, skel->maps.sock_map, AF_INET);
run_tests(skel, skel->maps.sock_map, AF_INET6);
+ test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
+ test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
skel->bss->test_sockmap = false;
run_tests(skel, skel->maps.sock_hash, AF_INET);
run_tests(skel, skel->maps.sock_hash, AF_INET6);
+ test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
+ test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
test_sockmap_listen__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index ec281b0363b8..86f97681ad89 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -195,8 +195,10 @@ static void run_test(int cgroup_fd)
pthread_mutex_lock(&server_started_mtx);
if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
- (void *)&server_fd)))
+ (void *)&server_fd))) {
+ pthread_mutex_unlock(&server_started_mtx);
goto close_server_fd;
+ }
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
new file mode 100644
index 000000000000..6b53b3cb8dad
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <netinet/tcp.h>
+#include "sockopt_qos_to_cc.skel.h"
+
+static void run_setsockopt_test(int cg_fd, int sock_fd)
+{
+ socklen_t optlen;
+ char cc[16]; /* TCP_CA_NAME_MAX */
+ int buf;
+ int err = -1;
+
+ buf = 0x2D;
+ err = setsockopt(sock_fd, SOL_IPV6, IPV6_TCLASS, &buf, sizeof(buf));
+ if (!ASSERT_OK(err, "setsockopt(sock_fd, IPV6_TCLASS)"))
+ return;
+
+ /* Verify the setsockopt cc change */
+ optlen = sizeof(cc);
+ err = getsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, cc, &optlen);
+ if (!ASSERT_OK(err, "getsockopt(sock_fd, TCP_CONGESTION)"))
+ return;
+
+ if (!ASSERT_STREQ(cc, "reno", "getsockopt(sock_fd, TCP_CONGESTION)"))
+ return;
+}
+
+void test_sockopt_qos_to_cc(void)
+{
+ struct sockopt_qos_to_cc *skel;
+ char cc_cubic[16] = "cubic"; /* TCP_CA_NAME_MAX */
+ int cg_fd = -1;
+ int sock_fd = -1;
+ int err;
+
+ cg_fd = test__join_cgroup("/sockopt_qos_to_cc");
+ if (!ASSERT_GE(cg_fd, 0, "cg-join(sockopt_qos_to_cc)"))
+ return;
+
+ skel = sockopt_qos_to_cc__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto done;
+
+ sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "v6 socket open"))
+ goto done;
+
+ err = setsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, &cc_cubic,
+ sizeof(cc_cubic));
+ if (!ASSERT_OK(err, "setsockopt(sock_fd, TCP_CONGESTION)"))
+ goto done;
+
+ skel->links.sockopt_qos_to_cc =
+ bpf_program__attach_cgroup(skel->progs.sockopt_qos_to_cc,
+ cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.sockopt_qos_to_cc,
+ "prog_attach(sockopt_qos_to_cc)"))
+ goto done;
+
+ run_setsockopt_test(cg_fd, sock_fd);
+
+done:
+ if (sock_fd != -1)
+ close(sock_fd);
+ if (cg_fd != -1)
+ close(cg_fd);
+ /* destroy can take null and error pointer */
+ sockopt_qos_to_cc__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
new file mode 100644
index 000000000000..53f0e0fa1a53
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <linux/ptrace.h>
+#include "test_task_pt_regs.skel.h"
+
+void test_task_pt_regs(void)
+{
+ struct test_task_pt_regs *skel;
+ struct bpf_link *uprobe_link;
+ size_t uprobe_offset;
+ ssize_t base_addr;
+ bool match;
+
+ base_addr = get_base_addr();
+ if (!ASSERT_GT(base_addr, 0, "get_base_addr"))
+ return;
+ uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+ skel = test_task_pt_regs__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+ if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
+ goto cleanup;
+
+ uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
+ false /* retprobe */,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
+ goto cleanup;
+ skel->links.handle_uprobe = uprobe_link;
+
+ /* trigger & validate uprobe */
+ get_base_addr();
+
+ if (!ASSERT_EQ(skel->bss->uprobe_res, 1, "check_uprobe_res"))
+ goto cleanup;
+
+ match = !memcmp(&skel->bss->current_regs, &skel->bss->ctx_regs,
+ sizeof(skel->bss->current_regs));
+ ASSERT_TRUE(match, "check_regs_match");
+
+cleanup:
+ test_task_pt_regs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 5703c918812b..e7201ba29ccd 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -13,15 +13,16 @@
#define _GNU_SOURCE
#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
#include <linux/limits.h>
#include <linux/sysctl.h>
-#include <linux/if_tun.h>
-#include <linux/if.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
-#include <sys/stat.h>
#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
#include "test_progs.h"
#include "network_helpers.h"
@@ -391,9 +392,7 @@ done:
static int test_ping(int family, const char *addr)
{
- const char *ping = family == AF_INET6 ? "ping6" : "ping";
-
- SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
+ SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
return 0;
fail:
return -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
new file mode 100644
index 000000000000..25f40e1b9967
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer.skel.h"
+
+static int timer(struct timer *timer_skel)
+{
+ int err, prog_fd;
+ __u32 duration = 0, retval;
+
+ err = timer__attach(timer_skel);
+ if (!ASSERT_OK(err, "timer_attach"))
+ return err;
+
+ ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
+ ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
+
+ prog_fd = bpf_program__fd(timer_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 0, "test_run");
+ timer__detach(timer_skel);
+
+ usleep(50); /* 10 usecs should be enough, but give it extra */
+ /* check that timer_cb1() was executed 10+10 times */
+ ASSERT_EQ(timer_skel->data->callback_check, 42, "callback_check2");
+ ASSERT_EQ(timer_skel->data->callback2_check, 42, "callback2_check2");
+
+ /* check that timer_cb2() was executed twice */
+ ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data");
+
+ /* check that there were no errors in timer execution */
+ ASSERT_EQ(timer_skel->bss->err, 0, "err");
+
+ /* check that code paths completed */
+ ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
+
+ return 0;
+}
+
+void test_timer(void)
+{
+ struct timer *timer_skel = NULL;
+ int err;
+
+ timer_skel = timer__open_and_load();
+ if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+ goto cleanup;
+
+ err = timer(timer_skel);
+ ASSERT_OK(err, "timer");
+cleanup:
+ timer__destroy(timer_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
new file mode 100644
index 000000000000..ced8f6cf347c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer_mim.skel.h"
+#include "timer_mim_reject.skel.h"
+
+static int timer_mim(struct timer_mim *timer_skel)
+{
+ __u32 duration = 0, retval;
+ __u64 cnt1, cnt2;
+ int err, prog_fd, key1 = 1;
+
+ err = timer_mim__attach(timer_skel);
+ if (!ASSERT_OK(err, "timer_attach"))
+ return err;
+
+ prog_fd = bpf_program__fd(timer_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 0, "test_run");
+ timer_mim__detach(timer_skel);
+
+ /* check that timer_cb[12] are incrementing 'cnt' */
+ cnt1 = READ_ONCE(timer_skel->bss->cnt);
+ for (int i = 0; i < 100; i++) {
+ cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ if (cnt2 != cnt1)
+ break;
+ usleep(200); /* 100 times more than interval */
+ }
+ ASSERT_GT(cnt2, cnt1, "cnt");
+
+ ASSERT_EQ(timer_skel->bss->err, 0, "err");
+ /* check that code paths completed */
+ ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok");
+
+ close(bpf_map__fd(timer_skel->maps.inner_htab));
+ err = bpf_map_delete_elem(bpf_map__fd(timer_skel->maps.outer_arr), &key1);
+ ASSERT_EQ(err, 0, "delete inner map");
+
+ /* check that timer_cb[12] are no longer running */
+ cnt1 = READ_ONCE(timer_skel->bss->cnt);
+ for (int i = 0; i < 100; i++) {
+ usleep(200); /* 100 times more than interval */
+ cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ if (cnt2 == cnt1)
+ break;
+ }
+ ASSERT_EQ(cnt2, cnt1, "cnt");
+
+ return 0;
+}
+
+void test_timer_mim(void)
+{
+ struct timer_mim_reject *timer_reject_skel = NULL;
+ libbpf_print_fn_t old_print_fn = NULL;
+ struct timer_mim *timer_skel = NULL;
+ int err;
+
+ old_print_fn = libbpf_set_print(NULL);
+ timer_reject_skel = timer_mim_reject__open_and_load();
+ libbpf_set_print(old_print_fn);
+ if (!ASSERT_ERR_PTR(timer_reject_skel, "timer_reject_skel_load"))
+ goto cleanup;
+
+ timer_skel = timer_mim__open_and_load();
+ if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+ goto cleanup;
+
+ err = timer_mim(timer_skel);
+ ASSERT_OK(err, "timer_mim");
+cleanup:
+ timer_mim__destroy(timer_skel);
+ timer_mim_reject__destroy(timer_reject_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
new file mode 100644
index 000000000000..370d220288a6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/**
+ * Test XDP bonding support
+ *
+ * Sets up two bonded veth pairs between two fresh namespaces
+ * and verifies that XDP_TX program loaded on a bond device
+ * are correctly loaded onto the slave devices and XDP_TX'd
+ * packets are balanced using bonding.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include <linux/if_bonding.h>
+#include <linux/limits.h>
+#include <linux/udp.h>
+
+#include "xdp_dummy.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
+#include "xdp_tx.skel.h"
+
+#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}
+#define BOND1_MAC_STR "00:11:22:33:44:55"
+#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}
+#define BOND2_MAC_STR "00:22:33:44:55:66"
+#define NPACKETS 100
+
+static int root_netns_fd = -1;
+
+static void restore_root_netns(void)
+{
+ ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns");
+}
+
+static int setns_by_name(char *name)
+{
+ int nsfd, err;
+ char nspath[PATH_MAX];
+
+ snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+ nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+ if (nsfd < 0)
+ return -1;
+
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
+ return err;
+}
+
+static int get_rx_packets(const char *iface)
+{
+ FILE *f;
+ char line[512];
+ int iface_len = strlen(iface);
+
+ f = fopen("/proc/net/dev", "r");
+ if (!f)
+ return -1;
+
+ while (fgets(line, sizeof(line), f)) {
+ char *p = line;
+
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ if (!strncmp(p, iface, iface_len)) {
+ p += iface_len;
+ if (*p++ != ':')
+ continue;
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ while (*p && *p != ' ')
+ p++; /* skip rx bytes */
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ fclose(f);
+ return atoi(p);
+ }
+ }
+ fclose(f);
+ return -1;
+}
+
+#define MAX_BPF_LINKS 8
+
+struct skeletons {
+ struct xdp_dummy *xdp_dummy;
+ struct xdp_tx *xdp_tx;
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+
+ int nlinks;
+ struct bpf_link *links[MAX_BPF_LINKS];
+};
+
+static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface)
+{
+ struct bpf_link *link;
+ int ifindex;
+
+ ifindex = if_nametoindex(iface);
+ if (!ASSERT_GT(ifindex, 0, "get ifindex"))
+ return -1;
+
+ if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached"))
+ return -1;
+
+ link = bpf_program__attach_xdp(prog, ifindex);
+ if (!ASSERT_OK_PTR(link, "attach xdp program"))
+ return -1;
+
+ skeletons->links[skeletons->nlinks++] = link;
+ return 0;
+}
+
+enum {
+ BOND_ONE_NO_ATTACH = 0,
+ BOND_BOTH_AND_ATTACH,
+};
+
+static const char * const mode_names[] = {
+ [BOND_MODE_ROUNDROBIN] = "balance-rr",
+ [BOND_MODE_ACTIVEBACKUP] = "active-backup",
+ [BOND_MODE_XOR] = "balance-xor",
+ [BOND_MODE_BROADCAST] = "broadcast",
+ [BOND_MODE_8023AD] = "802.3ad",
+ [BOND_MODE_TLB] = "balance-tlb",
+ [BOND_MODE_ALB] = "balance-alb",
+};
+
+static const char * const xmit_policy_names[] = {
+ [BOND_XMIT_POLICY_LAYER2] = "layer2",
+ [BOND_XMIT_POLICY_LAYER34] = "layer3+4",
+ [BOND_XMIT_POLICY_LAYER23] = "layer2+3",
+ [BOND_XMIT_POLICY_ENCAP23] = "encap2+3",
+ [BOND_XMIT_POLICY_ENCAP34] = "encap3+4",
+};
+
+static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
+ int bond_both_attach)
+{
+#define SYS(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ return -1; \
+ })
+
+ SYS("ip netns add ns_dst");
+ SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
+ SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
+
+ SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s",
+ mode_names[mode], xmit_policy_names[xmit_policy]);
+ SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
+ SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
+ mode_names[mode], xmit_policy_names[xmit_policy]);
+ SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
+
+ SYS("ip link set veth1_1 master bond1");
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+ SYS("ip link set veth1_2 master bond1");
+ } else {
+ SYS("ip link set veth1_2 up addrgenmode none");
+
+ if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
+ return -1;
+ }
+
+ SYS("ip -netns ns_dst link set veth2_1 master bond2");
+
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH)
+ SYS("ip -netns ns_dst link set veth2_2 master bond2");
+ else
+ SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none");
+
+ /* Load a dummy program on sending side as with veth peer needs to have a
+ * XDP program loaded as well.
+ */
+ if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1"))
+ return -1;
+
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+ if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst"))
+ return -1;
+
+ if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2"))
+ return -1;
+
+ restore_root_netns();
+ }
+
+ return 0;
+
+#undef SYS
+}
+
+static void bonding_cleanup(struct skeletons *skeletons)
+{
+ restore_root_netns();
+ while (skeletons->nlinks) {
+ skeletons->nlinks--;
+ bpf_link__destroy(skeletons->links[skeletons->nlinks]);
+ }
+ ASSERT_OK(system("ip link delete bond1"), "delete bond1");
+ ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1");
+ ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2");
+ ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst");
+}
+
+static int send_udp_packets(int vary_dst_ip)
+{
+ struct ethhdr eh = {
+ .h_source = BOND1_MAC,
+ .h_dest = BOND2_MAC,
+ .h_proto = htons(ETH_P_IP),
+ };
+ uint8_t buf[128] = {};
+ struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh));
+ struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph));
+ int i, s = -1;
+ int ifindex;
+
+ s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (!ASSERT_GE(s, 0, "socket"))
+ goto err;
+
+ ifindex = if_nametoindex("bond1");
+ if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex"))
+ goto err;
+
+ memcpy(buf, &eh, sizeof(eh));
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->tos = 16;
+ iph->id = 1;
+ iph->ttl = 64;
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = 1;
+ iph->daddr = 2;
+ iph->tot_len = htons(sizeof(buf) - ETH_HLEN);
+ iph->check = 0;
+
+ for (i = 1; i <= NPACKETS; i++) {
+ int n;
+ struct sockaddr_ll saddr_ll = {
+ .sll_ifindex = ifindex,
+ .sll_halen = ETH_ALEN,
+ .sll_addr = BOND2_MAC,
+ };
+
+ /* vary the UDP destination port for even distribution with roundrobin/xor modes */
+ uh->dest++;
+
+ if (vary_dst_ip)
+ iph->daddr++;
+
+ n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll));
+ if (!ASSERT_EQ(n, sizeof(buf), "sendto"))
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (s >= 0)
+ close(s);
+ return -1;
+}
+
+static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy)
+{
+ int bond1_rx;
+
+ if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH))
+ goto out;
+
+ if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34))
+ goto out;
+
+ bond1_rx = get_rx_packets("bond1");
+ ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets");
+
+ switch (mode) {
+ case BOND_MODE_ROUNDROBIN:
+ case BOND_MODE_XOR: {
+ int veth1_rx = get_rx_packets("veth1_1");
+ int veth2_rx = get_rx_packets("veth1_2");
+ int diff = abs(veth1_rx - veth2_rx);
+
+ ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets");
+
+ switch (xmit_policy) {
+ case BOND_XMIT_POLICY_LAYER2:
+ ASSERT_GE(diff, NPACKETS,
+ "expected packets on only one of the interfaces");
+ break;
+ case BOND_XMIT_POLICY_LAYER23:
+ case BOND_XMIT_POLICY_LAYER34:
+ ASSERT_LT(diff, NPACKETS/2,
+ "expected even distribution of packets");
+ break;
+ default:
+ PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+ break;
+ }
+ break;
+ }
+ case BOND_MODE_ACTIVEBACKUP: {
+ int veth1_rx = get_rx_packets("veth1_1");
+ int veth2_rx = get_rx_packets("veth1_2");
+ int diff = abs(veth1_rx - veth2_rx);
+
+ ASSERT_GE(diff, NPACKETS,
+ "expected packets on only one of the interfaces");
+ break;
+ }
+ default:
+ PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+ break;
+ }
+
+out:
+ bonding_cleanup(skeletons);
+}
+
+/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding
+ * all the interfaces to it and checking that broadcasting won't send the packet
+ * to neither the ingress bond device (bond2) or its slave (veth2_1).
+ */
+static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons)
+{
+ static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"};
+ int veth1_1_rx, veth1_2_rx;
+ int err;
+
+ if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23,
+ BOND_ONE_NO_ATTACH))
+ goto out;
+
+
+ if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst"))
+ goto out;
+
+ /* populate the devmap with the relevant interfaces */
+ for (int i = 0; i < ARRAY_SIZE(ifaces); i++) {
+ int ifindex = if_nametoindex(ifaces[i]);
+ int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all);
+
+ if (!ASSERT_GT(ifindex, 0, "could not get interface index"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0);
+ if (!ASSERT_OK(err, "add interface to map_all"))
+ goto out;
+ }
+
+ if (xdp_attach(skeletons,
+ skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog,
+ "bond2"))
+ goto out;
+
+ restore_root_netns();
+
+ if (send_udp_packets(BOND_MODE_ROUNDROBIN))
+ goto out;
+
+ veth1_1_rx = get_rx_packets("veth1_1");
+ veth1_2_rx = get_rx_packets("veth1_2");
+
+ ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1");
+ ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2");
+
+out:
+ restore_root_netns();
+ bonding_cleanup(skeletons);
+}
+
+/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */
+static void test_xdp_bonding_attach(struct skeletons *skeletons)
+{
+ struct bpf_link *link = NULL;
+ struct bpf_link *link2 = NULL;
+ int veth, bond;
+ int err;
+
+ if (!ASSERT_OK(system("ip link add veth type veth"), "add veth"))
+ goto out;
+ if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+ goto out;
+
+ veth = if_nametoindex("veth");
+ if (!ASSERT_GE(veth, 0, "if_nametoindex veth"))
+ goto out;
+ bond = if_nametoindex("bond");
+ if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+ goto out;
+
+ /* enslaving with a XDP program loaded fails */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_OK_PTR(link, "attach program to veth"))
+ goto out;
+
+ err = system("ip link set veth master bond");
+ if (!ASSERT_NEQ(err, 0, "attaching slave with xdp program expected to fail"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ err = system("ip link set veth master bond");
+ if (!ASSERT_OK(err, "set veth master"))
+ goto out;
+
+ /* attaching to slave when master has no program is allowed */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved"))
+ goto out;
+
+ /* attaching to master not allowed when slave has program loaded */
+ link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* attaching XDP program to master allowed when slave has no program */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_OK_PTR(link, "attach program to master"))
+ goto out;
+
+ /* attaching to slave not allowed when master has program loaded */
+ link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ ASSERT_ERR_PTR(link2, "attach program to slave when master has program");
+
+out:
+ bpf_link__destroy(link);
+ bpf_link__destroy(link2);
+
+ system("ip link del veth");
+ system("ip link del bond");
+}
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level != LIBBPF_WARN)
+ vprintf(format, args);
+ return 0;
+}
+
+struct bond_test_case {
+ char *name;
+ int mode;
+ int xmit_policy;
+};
+
+static struct bond_test_case bond_test_cases[] = {
+ { "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, },
+ { "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 },
+
+ { "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, },
+ { "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, },
+ { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, },
+};
+
+void test_xdp_bonding(void)
+{
+ libbpf_print_fn_t old_print_fn;
+ struct skeletons skeletons = {};
+ int i;
+
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+ goto out;
+
+ skeletons.xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+ goto out;
+
+ skeletons.xdp_tx = xdp_tx__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+ goto out;
+
+ skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern,
+ "xdp_redirect_multi_kern__open_and_load"))
+ goto out;
+
+ if (test__start_subtest("xdp_bonding_attach"))
+ test_xdp_bonding_attach(&skeletons);
+
+ for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
+ struct bond_test_case *test_case = &bond_test_cases[i];
+
+ if (test__start_subtest(test_case->name))
+ test_xdp_bonding_with_mode(
+ &skeletons,
+ test_case->mode,
+ test_case->xmit_policy);
+ }
+
+ if (test__start_subtest("xdp_bonding_redirect_multi"))
+ test_xdp_bonding_redirect_multi(&skeletons);
+
+out:
+ xdp_dummy__destroy(skeletons.xdp_dummy);
+ xdp_tx__destroy(skeletons.xdp_tx);
+ xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern);
+
+ libbpf_set_print(old_print_fn);
+ if (root_netns_fd >= 0)
+ close(root_netns_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
new file mode 100644
index 000000000000..ab4952b9fb1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_context_test_run.skel.h"
+
+void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
+ __u32 data_meta, __u32 data, __u32 data_end,
+ __u32 ingress_ifindex, __u32 rx_queue_index,
+ __u32 egress_ifindex)
+{
+ struct xdp_md ctx = {
+ .data = data,
+ .data_end = data_end,
+ .data_meta = data_meta,
+ .ingress_ifindex = ingress_ifindex,
+ .rx_queue_index = rx_queue_index,
+ .egress_ifindex = egress_ifindex,
+ };
+ int err;
+
+ opts.ctx_in = &ctx;
+ opts.ctx_size_in = sizeof(ctx);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(errno, EINVAL, "errno-EINVAL");
+ ASSERT_ERR(err, "bpf_prog_test_run");
+}
+
+void test_xdp_context_test_run(void)
+{
+ struct test_xdp_context_test_run *skel = NULL;
+ char data[sizeof(pkt_v4) + sizeof(__u32)];
+ char bad_ctx[sizeof(struct xdp_md) + 1];
+ struct xdp_md ctx_in, ctx_out;
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = sizeof(data),
+ .ctx_out = &ctx_out,
+ .ctx_size_out = sizeof(ctx_out),
+ .repeat = 1,
+ );
+ int err, prog_fd;
+
+ skel = test_xdp_context_test_run__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.xdp_context);
+
+ /* Data past the end of the kernel's struct xdp_md must be 0 */
+ bad_ctx[sizeof(bad_ctx) - 1] = 1;
+ opts.ctx_in = bad_ctx;
+ opts.ctx_size_in = sizeof(bad_ctx);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(errno, E2BIG, "extradata-errno");
+ ASSERT_ERR(err, "bpf_prog_test_run(extradata)");
+
+ *(__u32 *)data = XDP_PASS;
+ *(struct ipv4_packet *)(data + sizeof(__u32)) = pkt_v4;
+ opts.ctx_in = &ctx_in;
+ opts.ctx_size_in = sizeof(ctx_in);
+ memset(&ctx_in, 0, sizeof(ctx_in));
+ ctx_in.data_meta = 0;
+ ctx_in.data = sizeof(__u32);
+ ctx_in.data_end = ctx_in.data + sizeof(pkt_v4);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "bpf_prog_test_run(valid)");
+ ASSERT_EQ(opts.retval, XDP_PASS, "valid-retval");
+ ASSERT_EQ(opts.data_size_out, sizeof(pkt_v4), "valid-datasize");
+ ASSERT_EQ(opts.ctx_size_out, opts.ctx_size_in, "valid-ctxsize");
+ ASSERT_EQ(ctx_out.data_meta, 0, "valid-datameta");
+ ASSERT_EQ(ctx_out.data, 0, "valid-data");
+ ASSERT_EQ(ctx_out.data_end, sizeof(pkt_v4), "valid-dataend");
+
+ /* Meta data's size must be a multiple of 4 */
+ test_xdp_context_error(prog_fd, opts, 0, 1, sizeof(data), 0, 0, 0);
+
+ /* data_meta must reference the start of data */
+ test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data),
+ 0, 0, 0);
+
+ /* Meta data must be 32 bytes or smaller */
+ test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0);
+
+ /* Total size of data must match data_end - data_meta */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
+ sizeof(data) - 1, 0, 0, 0);
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
+ sizeof(data) + 1, 0, 0, 0);
+
+ /* RX queue cannot be specified without specifying an ingress */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 0, 1, 0);
+
+ /* Interface 1 is always the loopback interface which always has only
+ * one RX queue (index 0). This makes index 1 an invalid rx queue index
+ * for interface 1.
+ */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 1, 1, 0);
+
+ /* The egress cannot be specified */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 0, 0, 1);
+
+ test_xdp_context_test_run__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index 0176573fe4e7..8755effd80b0 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -7,64 +7,53 @@
#define IFINDEX_LO 1
-void test_xdp_with_cpumap_helpers(void)
+void test_xdp_cpumap_attach(void)
{
struct test_xdp_with_cpumap_helpers *skel;
struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
struct bpf_cpumap_val val = {
.qsize = 192,
};
- __u32 duration = 0, idx = 0;
- __u32 len = sizeof(info);
int err, prog_fd, map_fd;
+ __u32 idx = 0;
skel = test_xdp_with_cpumap_helpers__open_and_load();
- if (CHECK_FAIL(!skel)) {
- perror("test_xdp_with_cpumap_helpers__open_and_load");
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
return;
- }
- /* can not attach program with cpumaps that allow programs
- * as xdp generic
- */
prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP",
- "should have failed\n");
+ if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
+ goto out_close;
+
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+ ASSERT_OK(err, "XDP program detach");
prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
map_fd = bpf_map__fd(skel->maps.cpu_map);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
goto out_close;
val.bpf_prog.fd = prog_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err, "Add program to cpumap entry", "err %d errno %d\n",
- err, errno);
+ ASSERT_OK(err, "Add program to cpumap entry");
err = bpf_map_lookup_elem(map_fd, &idx, &val);
- CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno);
- CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry",
- "expected %u read %u\n", info.id, val.bpf_prog.id);
+ ASSERT_OK(err, "Read cpumap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
/* can not attach BPF_XDP_CPUMAP program to a device */
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program",
- "should have failed\n");
+ if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
+ bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
val.qsize = 192;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry",
- "should have failed\n");
+ ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
out_close:
test_xdp_with_cpumap_helpers__destroy(skel);
}
-
-void test_xdp_cpumap_attach(void)
-{
- if (test__start_subtest("cpumap_with_progs"))
- test_xdp_with_cpumap_helpers();
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 88ef3ec8ac4c..c72af030ff10 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -16,50 +16,45 @@ void test_xdp_with_devmap_helpers(void)
.ifindex = IFINDEX_LO,
};
__u32 len = sizeof(info);
- __u32 duration = 0, idx = 0;
int err, dm_fd, map_fd;
+ __u32 idx = 0;
skel = test_xdp_with_devmap_helpers__open_and_load();
- if (CHECK_FAIL(!skel)) {
- perror("test_xdp_with_devmap_helpers__open_and_load");
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
return;
- }
- /* can not attach program with DEVMAPs that allow programs
- * as xdp generic
- */
dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Generic attach of program with 8-byte devmap",
- "should have failed\n");
+ if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
+ goto out_close;
+
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+ ASSERT_OK(err, "XDP program detach");
dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
map_fd = bpf_map__fd(skel->maps.dm_ports);
err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
goto out_close;
val.bpf_prog.fd = dm_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err, "Add program to devmap entry",
- "err %d errno %d\n", err, errno);
+ ASSERT_OK(err, "Add program to devmap entry");
err = bpf_map_lookup_elem(map_fd, &idx, &val);
- CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
- CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
- "expected %u read %u\n", info.id, val.bpf_prog.id);
+ ASSERT_OK(err, "Read devmap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
/* can not attach BPF_XDP_DEVMAP program to a device */
err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
- "should have failed\n");
+ if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
+ bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
val.ifindex = 1;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
- "should have failed\n");
+ ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
out_close:
test_xdp_with_devmap_helpers__destroy(skel);
@@ -68,12 +63,10 @@ out_close:
void test_neg_xdp_devmap_helpers(void)
{
struct test_xdp_devmap_helpers *skel;
- __u32 duration = 0;
skel = test_xdp_devmap_helpers__open_and_load();
- if (CHECK(skel,
- "Load of XDP program accessing egress ifindex without attach type",
- "should have failed\n")) {
+ if (!ASSERT_EQ(skel, NULL,
+ "Load of XDP program accessing egress ifindex without attach type")) {
test_xdp_devmap_helpers__destroy(skel);
}
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index fd42247da8b4..9573be6122be 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -17,6 +17,11 @@
char _license[] SEC("license") = "GPL";
+volatile const char fallback[TCP_CA_NAME_MAX];
+const char bpf_dctcp[] = "bpf_dctcp";
+const char tcp_cdg[] = "cdg";
+char cc_res[TCP_CA_NAME_MAX];
+int tcp_cdg_res = 0;
int stg_result = 0;
struct {
@@ -57,6 +62,26 @@ void BPF_PROG(dctcp_init, struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
int *stg;
+ if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
+ /* Switch to fallback */
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)fallback, sizeof(fallback));
+ /* Switch back to myself which the bpf trampoline
+ * stopped calling dctcp_init recursively.
+ */
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)bpf_dctcp, sizeof(bpf_dctcp));
+ /* Switch back to fallback */
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)fallback, sizeof(fallback));
+ /* Expecting -ENOTSUPP for tcp_cdg_res */
+ tcp_cdg_res = bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)tcp_cdg, sizeof(tcp_cdg));
+ bpf_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)cc_res, sizeof(cc_res));
+ return;
+ }
+
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
ca->loss_cwnd = 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
new file mode 100644
index 000000000000..d836f7c372f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+const char cubic[] = "cubic";
+
+void BPF_STRUCT_OPS(dctcp_nouse_release, struct sock *sk)
+{
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)cubic, sizeof(cubic));
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops dctcp_rel = {
+ .release = (void *)dctcp_nouse_release,
+ .name = "bpf_dctcp_rel",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 3d83b185c4bc..8cfaeba1ddbf 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -12,6 +12,7 @@
#define tcp6_sock tcp6_sock___not_used
#define bpf_iter__udp bpf_iter__udp___not_used
#define udp6_sock udp6_sock___not_used
+#define bpf_iter__unix bpf_iter__unix___not_used
#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
#define bpf_iter__sockmap bpf_iter__sockmap___not_used
@@ -32,6 +33,7 @@
#undef tcp6_sock
#undef bpf_iter__udp
#undef udp6_sock
+#undef bpf_iter__unix
#undef bpf_iter__bpf_map_elem
#undef bpf_iter__bpf_sk_storage_map
#undef bpf_iter__sockmap
@@ -103,6 +105,12 @@ struct udp6_sock {
struct ipv6_pinfo inet6;
} __attribute__((preserve_access_index));
+struct bpf_iter__unix {
+ struct bpf_iter_meta *meta;
+ struct unix_sock *unix_sk;
+ uid_t uid;
+} __attribute__((preserve_access_index));
+
struct bpf_iter__bpf_map_elem {
struct bpf_iter_meta *meta;
struct bpf_map *map;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
new file mode 100644
index 000000000000..b77adfd55d73
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define bpf_tcp_sk(skc) ({ \
+ struct sock_common *_skc = skc; \
+ sk = NULL; \
+ tp = NULL; \
+ if (_skc) { \
+ tp = bpf_skc_to_tcp_sock(_skc); \
+ sk = (struct sock *)tp; \
+ } \
+ tp; \
+})
+
+unsigned short reuse_listen_hport = 0;
+unsigned short listen_hport = 0;
+char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic";
+char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
+bool random_retry = false;
+
+static bool tcp_cc_eq(const char *a, const char *b)
+{
+ int i;
+
+ for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+ if (a[i] != b[i])
+ return false;
+ if (!a[i])
+ break;
+ }
+
+ return true;
+}
+
+SEC("iter/tcp")
+int change_tcp_cc(struct bpf_iter__tcp *ctx)
+{
+ char cur_cc[TCP_CA_NAME_MAX];
+ struct tcp_sock *tp;
+ struct sock *sk;
+ int ret;
+
+ if (!bpf_tcp_sk(ctx->sk_common))
+ return 0;
+
+ if (sk->sk_family != AF_INET6 ||
+ (sk->sk_state != TCP_LISTEN &&
+ sk->sk_state != TCP_ESTABLISHED) ||
+ (sk->sk_num != reuse_listen_hport &&
+ sk->sk_num != listen_hport &&
+ bpf_ntohs(sk->sk_dport) != listen_hport))
+ return 0;
+
+ if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION,
+ cur_cc, sizeof(cur_cc)))
+ return 0;
+
+ if (!tcp_cc_eq(cur_cc, cubic_cc))
+ return 0;
+
+ if (random_retry && bpf_get_prandom_u32() % 4 == 1)
+ return 1;
+
+ bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc));
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index 2e4775c35414..92267abb462f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -121,7 +121,7 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
}
BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
- seq_num, src, srcp, destp, destp);
+ seq_num, src, srcp, dest, destp);
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
state,
tp->write_seq - tp->snd_una, rx_queue,
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
new file mode 100644
index 000000000000..94423902685d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/unix")
+int dump_unix(struct bpf_iter__unix *ctx)
+{
+ struct unix_sock *unix_sk = ctx->unix_sk;
+ struct sock *sk = (struct sock *)unix_sk;
+ struct seq_file *seq;
+ __u32 seq_num;
+
+ if (!unix_sk)
+ return 0;
+
+ seq = ctx->meta->seq;
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "Num RefCount Protocol Flags Type St Inode Path\n");
+
+ BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %8lu",
+ unix_sk,
+ sk->sk_refcnt.refs.counter,
+ 0,
+ sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+ sk->sk_type,
+ sk->sk_socket ?
+ (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
+ (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
+ sock_i_ino(sk));
+
+ if (unix_sk->addr) {
+ if (!UNIX_ABSTRACT(unix_sk)) {
+ BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
+ } else {
+ /* The name of the abstract UNIX domain socket starts
+ * with '\0' and can contain '\0'. The null bytes
+ * should be escaped as done in unix_seq_show().
+ */
+ __u64 i, len;
+
+ len = unix_sk->addr->len - sizeof(short);
+
+ BPF_SEQ_PRINTF(seq, " @");
+
+ for (i = 1; i < len; i++) {
+ /* unix_mkname() tests this upper bound. */
+ if (i >= sizeof(struct sockaddr_un))
+ break;
+
+ BPF_SEQ_PRINTF(seq, "%c",
+ unix_sk->addr->name->sun_path[i] ?:
+ '@');
+ }
+ }
+ }
+
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 01378911252b..eef5646ddb19 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -5,6 +5,14 @@
#define AF_INET 2
#define AF_INET6 10
+#define __SO_ACCEPTCON (1 << 16)
+#define UNIX_HASH_SIZE 256
+#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
+
+#define SOL_TCP 6
+#define TCP_CONGESTION 13
+#define TCP_CA_NAME_MAX 16
+
#define ICSK_TIME_RETRANS 1
#define ICSK_TIME_PROBE0 3
#define ICSK_TIME_LOSS_PROBE 5
@@ -32,6 +40,8 @@
#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr
#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr
+#define sk_num __sk_common.skc_num
+#define sk_dport __sk_common.skc_dport
#define sk_family __sk_common.skc_family
#define sk_rmem_alloc sk_backlog.rmem_alloc
#define sk_refcnt __sk_common.skc_refcnt
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
new file mode 100644
index 000000000000..a587aeca5ae0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern const void bpf_fentry_test1 __ksym;
+extern const void bpf_fentry_test2 __ksym;
+extern const void bpf_fentry_test3 __ksym;
+extern const void bpf_fentry_test4 __ksym;
+extern const void bpf_modify_return_test __ksym;
+extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test1_result = (const void *) addr == &bpf_fentry_test1;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_fentry_test2")
+int BPF_PROG(test2, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test2_result = (const void *) addr == &bpf_fentry_test2;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("kprobe/bpf_fentry_test3")
+int test3(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test3_result = (const void *) addr == &bpf_fentry_test3;
+ return 0;
+}
+
+__u64 test4_result = 0;
+SEC("kretprobe/bpf_fentry_test4")
+int BPF_KRETPROBE(test4)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test4_result = (const void *) addr == &bpf_fentry_test4;
+ return 0;
+}
+
+__u64 test5_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(test5, int a, int *b, int ret)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test5_result = (const void *) addr == &bpf_modify_return_test;
+ return ret;
+}
+
+__u64 test6_result = 0;
+SEC("kprobe/bpf_fentry_test6+0x5")
+int test6(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test6_result = (const void *) addr == &bpf_fentry_test6 + 5;
+ return 0;
+}
+
+__u64 test7_result = 0;
+SEC("kprobe/bpf_fentry_test7+5")
+int test7(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test7_result = (const void *) addr == &bpf_fentry_test7 + 5;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
index b2dcb7d9cb03..5fbd9e232d44 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -9,7 +9,7 @@ extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
__u32 c, __u64 d) __ksym;
extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
int active_res = -1;
-int sk_state = -1;
+int sk_state_res = -1;
int __noinline f1(struct __sk_buff *skb)
{
@@ -28,7 +28,7 @@ int __noinline f1(struct __sk_buff *skb)
if (active)
active_res = *active;
- sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
+ sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->sk_state;
return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
}
diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c
index d071adf178bd..43649bce4c54 100644
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -13,21 +13,21 @@
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
__type(key, struct bpf_cgroup_storage_key);
- __type(value, struct percpu_net_cnt);
+ __type(value, union percpu_net_cnt);
} percpu_netcnt SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
__type(key, struct bpf_cgroup_storage_key);
- __type(value, struct net_cnt);
+ __type(value, union net_cnt);
} netcnt SEC(".maps");
SEC("cgroup/skb")
int bpf_nextcnt(struct __sk_buff *skb)
{
- struct percpu_net_cnt *percpu_cnt;
+ union percpu_net_cnt *percpu_cnt;
char fmt[] = "%d %llu %llu\n";
- struct net_cnt *cnt;
+ union net_cnt *cnt;
__u64 ts, dt;
int ret;
diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
new file mode 100644
index 000000000000..aeff3a4f9287
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+#define AF_INET6 10
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sockops_netns_cookies SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_msg_netns_cookies SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sockops")
+int get_netns_cookie_sockops(struct bpf_sock_ops *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ int *cookie;
+ __u32 key = 0;
+
+ if (ctx->family != AF_INET6)
+ return 1;
+
+ if (!sk)
+ return 1;
+
+ switch (ctx->op) {
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ cookie = bpf_sk_storage_get(&sockops_netns_cookies, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!cookie)
+ return 1;
+
+ *cookie = bpf_get_netns_cookie(ctx);
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ bpf_sock_map_update(ctx, &sock_map, &key, BPF_NOEXIST);
+ break;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+SEC("sk_msg")
+int get_netns_cookie_sk_msg(struct sk_msg_md *msg)
+{
+ struct bpf_sock *sk = msg->sk;
+ int *cookie;
+
+ if (msg->family != AF_INET6)
+ return 1;
+
+ if (!sk)
+ return 1;
+
+ cookie = bpf_sk_storage_get(&sk_msg_netns_cookies, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!cookie)
+ return 1;
+
+ *cookie = bpf_get_netns_cookie(msg);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
new file mode 100644
index 000000000000..1bce83b6e3a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <string.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("cgroup/setsockopt")
+int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
+{
+ void *optval_end = ctx->optval_end;
+ int *optval = ctx->optval;
+ char buf[TCP_CA_NAME_MAX];
+ char cc_reno[TCP_CA_NAME_MAX] = "reno";
+ char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
+
+ if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
+ return 1;
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
+ return 0;
+
+ if (!tcp_cc_eq(buf, cc_cubic))
+ return 0;
+
+ if (*optval == 0x2d) {
+ if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &cc_reno,
+ sizeof(cc_reno)))
+ return 0;
+ }
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index 8acdb99b5959..79c8139b63b8 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -33,6 +33,14 @@ int _getsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+ if (bpf_get_netns_cookie(NULL) == 0)
+ return 0;
+
+ if (bpf_get_netns_cookie(ctx) == 0)
+ return 0;
+
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
@@ -123,6 +131,14 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+ if (bpf_get_netns_cookie(NULL) == 0)
+ return 0;
+
+ if (bpf_get_netns_cookie(ctx) == 0)
+ return 0;
+
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
new file mode 100644
index 000000000000..2d3a7710e2ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int my_tid;
+
+int kprobe_res;
+int kprobe_multi_res;
+int kretprobe_res;
+int uprobe_res;
+int uretprobe_res;
+int tp_res;
+int pe_res;
+
+static void update(void *ctx, int *res)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid())
+ return;
+
+ *res |= bpf_get_attach_cookie(ctx);
+}
+
+SEC("kprobe/sys_nanosleep")
+int handle_kprobe(struct pt_regs *ctx)
+{
+ update(ctx, &kprobe_res);
+ return 0;
+}
+
+SEC("kretprobe/sys_nanosleep")
+int handle_kretprobe(struct pt_regs *ctx)
+{
+ update(ctx, &kretprobe_res);
+ return 0;
+}
+
+SEC("uprobe/trigger_func")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ update(ctx, &uprobe_res);
+ return 0;
+}
+
+SEC("uretprobe/trigger_func")
+int handle_uretprobe(struct pt_regs *ctx)
+{
+ update(ctx, &uretprobe_res);
+ return 0;
+}
+
+/* bpf_prog_array, used by kernel internally to keep track of attached BPF
+ * programs to a given BPF hook (e.g., for tracepoints) doesn't allow the same
+ * BPF program to be attached multiple times. So have three identical copies
+ * ready to attach to the same tracepoint.
+ */
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp1(struct pt_regs *ctx)
+{
+ update(ctx, &tp_res);
+ return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp2(struct pt_regs *ctx)
+{
+ update(ctx, &tp_res);
+ return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp3(void *ctx)
+{
+ update(ctx, &tp_res);
+ return 1;
+}
+
+SEC("perf_event")
+int handle_pe(struct pt_regs *ctx)
+{
+ update(ctx, &pe_res);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c
index 44f5aa2e8956..9a7829c5e4a7 100644
--- a/tools/testing/selftests/bpf/progs/test_core_autosize.c
+++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c
@@ -125,6 +125,16 @@ int handle_downsize(void *ctx)
return 0;
}
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_core_read_int bpf_core_read
+#else
+#define bpf_core_read_int(dst, sz, src) ({ \
+ /* Prevent "subtraction from stack pointer prohibited" */ \
+ volatile long __off = sizeof(*dst) - (sz); \
+ bpf_core_read((char *)(dst) + __off, sz, src); \
+})
+#endif
+
SEC("raw_tp/sys_enter")
int handle_probed(void *ctx)
{
@@ -132,23 +142,23 @@ int handle_probed(void *ctx)
__u64 tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
ptr_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val1), &in->val1);
val1_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val2), &in->val2);
val2_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val3), &in->val3);
val3_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val4), &in->val4);
val4_probed = tmp;
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
new file mode 100644
index 000000000000..5f8379aadb29
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test weak ksyms.
+ *
+ * Copyright (c) 2021 Google
+ */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+int out__existing_typed = -1;
+__u64 out__existing_typeless = -1;
+
+__u64 out__non_existent_typeless = -1;
+__u64 out__non_existent_typed = -1;
+
+/* existing weak symbols */
+
+/* test existing weak symbols can be resolved. */
+extern const struct rq runqueues __ksym __weak; /* typed */
+extern const void bpf_prog_active __ksym __weak; /* typeless */
+
+
+/* non-existent weak symbols. */
+
+/* typeless symbols, default to zero. */
+extern const void bpf_link_fops1 __ksym __weak;
+
+/* typed symbols, default to zero. */
+extern const int bpf_link_fops2 __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int pass_handler(const void *ctx)
+{
+ struct rq *rq;
+
+ /* tests existing symbols. */
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
+ if (rq)
+ out__existing_typed = rq->cpu;
+ out__existing_typeless = (__u64)&bpf_prog_active;
+
+ /* tests non-existent symbols. */
+ out__non_existent_typeless = (__u64)&bpf_link_fops1;
+
+ /* tests non-existent symbols. */
+ out__non_existent_typed = (__u64)&bpf_link_fops2;
+
+ if (&bpf_link_fops2) /* can't happen */
+ out__non_existent_typed = (__u64)bpf_per_cpu_ptr(&bpf_link_fops2, 0);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
new file mode 100644
index 000000000000..703c08e06442
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Isovalent, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, int);
+ __uint(max_entries, 4);
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 0); /* This will make map creation to fail */
+ __uint(key_size, sizeof(__u32));
+ __array(values, struct inner);
+} mim SEC(".maps");
+
+SEC("xdp")
+int xdp_noop0(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_link.c b/tools/testing/selftests/bpf/progs/test_perf_link.c
new file mode 100644
index 000000000000..c1db9fd98d0c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_link.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int run_cnt = 0;
+
+SEC("perf_event")
+int handler(struct pt_regs *ctx)
+{
+ __sync_fetch_and_add(&run_cnt, 1);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index e83d0b48d80c..8249075f088f 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -91,7 +91,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_use_after_free")
+SEC("classifier/err_use_after_free")
int bpf_sk_lookup_uaf(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -106,7 +106,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
return family;
}
-SEC("classifier/fail_modify_sk_pointer")
+SEC("classifier/err_modify_sk_pointer")
int bpf_sk_lookup_modptr(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -121,7 +121,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_modify_sk_or_null_pointer")
+SEC("classifier/err_modify_sk_or_null_pointer")
int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -135,7 +135,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_no_release")
+SEC("classifier/err_no_release")
int bpf_sk_lookup_test2(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -144,7 +144,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_release_twice")
+SEC("classifier/err_release_twice")
int bpf_sk_lookup_test3(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -156,7 +156,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_release_unchecked")
+SEC("classifier/err_release_unchecked")
int bpf_sk_lookup_test4(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -173,7 +173,7 @@ void lookup_no_release(struct __sk_buff *skb)
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
}
-SEC("classifier/fail_no_release_subcall")
+SEC("classifier/err_no_release_subcall")
int bpf_sk_lookup_test5(struct __sk_buff *skb)
{
lookup_no_release(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
index e2ad26150f9b..8fda07544023 100644
--- a/tools/testing/selftests/bpf/progs/test_snprintf.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -59,9 +59,9 @@ int handler(const void *ctx)
/* Kernel pointers */
addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p",
0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55);
- /* Strings embedding */
- str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s %+05s",
- str1, longstr);
+ /* Strings and single-byte character embedding */
+ str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s % 9c %+2c %-3c %04c %0c %+05s",
+ str1, 'a', 'b', 'c', 'd', 'e', longstr);
/* Overflow */
over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow");
/* Padding of fixed width numbers */
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
new file mode 100644
index 000000000000..6c059f1cfa1b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct pt_regs current_regs = {};
+struct pt_regs ctx_regs = {};
+int uprobe_res = 0;
+
+SEC("uprobe/trigger_func")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ struct task_struct *current;
+ struct pt_regs *regs;
+
+ current = bpf_get_current_task_btf();
+ regs = (struct pt_regs *) bpf_task_pt_regs(current);
+ __builtin_memcpy(&current_regs, regs, sizeof(*regs));
+ __builtin_memcpy(&ctx_regs, ctx, sizeof(*ctx));
+
+ /* Prove that uprobe was run */
+ uprobe_res = 1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 84cd63259554..a0e7762b1e5a 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -528,7 +528,6 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
- char buf[sizeof(struct v6hdr)];
struct gre_hdr greh;
struct udphdr udph;
int olen = len;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c
new file mode 100644
index 000000000000..d7b88cd05afd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_context(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ __u32 *metadata = (void *)(long)xdp->data_meta;
+ __u32 ret;
+
+ if (metadata + 1 > data)
+ return XDP_ABORTED;
+ ret = *metadata;
+ if (bpf_xdp_adjust_meta(xdp, 4))
+ return XDP_ABORTED;
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
new file mode 100644
index 000000000000..5f5309791649
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int counter;
+ struct bpf_timer timer;
+ struct bpf_spin_lock lock; /* unused */
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap_malloc SEC(".maps");
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __type(value, struct elem);
+} lru SEC(".maps");
+
+__u64 bss_data;
+__u64 err;
+__u64 ok;
+__u64 callback_check = 52;
+__u64 callback2_check = 52;
+
+#define ARRAY 1
+#define HTAB 2
+#define HTAB_MALLOC 3
+#define LRU 4
+
+/* callback for array and lru timers */
+static int timer_cb1(void *map, int *key, struct bpf_timer *timer)
+{
+ /* increment bss variable twice.
+ * Once via array timer callback and once via lru timer callback
+ */
+ bss_data += 5;
+
+ /* *key == 0 - the callback was called for array timer.
+ * *key == 4 - the callback was called from lru timer.
+ */
+ if (*key == ARRAY) {
+ struct bpf_timer *lru_timer;
+ int lru_key = LRU;
+
+ /* rearm array timer to be called again in ~35 seconds */
+ if (bpf_timer_start(timer, 1ull << 35, 0) != 0)
+ err |= 1;
+
+ lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
+ if (!lru_timer)
+ return 0;
+ bpf_timer_set_callback(lru_timer, timer_cb1);
+ if (bpf_timer_start(lru_timer, 0, 0) != 0)
+ err |= 2;
+ } else if (*key == LRU) {
+ int lru_key, i;
+
+ for (i = LRU + 1;
+ i <= 100 /* for current LRU eviction algorithm this number
+ * should be larger than ~ lru->max_entries * 2
+ */;
+ i++) {
+ struct elem init = {};
+
+ /* lru_key cannot be used as loop induction variable
+ * otherwise the loop will be unbounded.
+ */
+ lru_key = i;
+
+ /* add more elements into lru map to push out current
+ * element and force deletion of this timer
+ */
+ bpf_map_update_elem(map, &lru_key, &init, 0);
+ /* look it up to bump it into active list */
+ bpf_map_lookup_elem(map, &lru_key);
+
+ /* keep adding until *key changes underneath,
+ * which means that key/timer memory was reused
+ */
+ if (*key != LRU)
+ break;
+ }
+
+ /* check that the timer was removed */
+ if (bpf_timer_cancel(timer) != -EINVAL)
+ err |= 4;
+ ok |= 1;
+ }
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct bpf_timer *arr_timer, *lru_timer;
+ struct elem init = {};
+ int lru_key = LRU;
+ int array_key = ARRAY;
+
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+
+ bpf_map_update_elem(&lru, &lru_key, &init, 0);
+ lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
+ if (!lru_timer)
+ return 0;
+ bpf_timer_init(lru_timer, &lru, CLOCK_MONOTONIC);
+
+ bpf_timer_set_callback(arr_timer, timer_cb1);
+ bpf_timer_start(arr_timer, 0 /* call timer_cb1 asap */, 0);
+
+ /* init more timers to check that array destruction
+ * doesn't leak timer memory.
+ */
+ array_key = 0;
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+ return 0;
+}
+
+/* callback for prealloc and non-prealloca hashtab timers */
+static int timer_cb2(void *map, int *key, struct hmap_elem *val)
+{
+ if (*key == HTAB)
+ callback_check--;
+ else
+ callback2_check--;
+ if (val->counter > 0 && --val->counter) {
+ /* re-arm the timer again to execute after 1 usec */
+ bpf_timer_start(&val->timer, 1000, 0);
+ } else if (*key == HTAB) {
+ struct bpf_timer *arr_timer;
+ int array_key = ARRAY;
+
+ /* cancel arr_timer otherwise bpf_fentry_test1 prog
+ * will stay alive forever.
+ */
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ if (bpf_timer_cancel(arr_timer) != 1)
+ /* bpf_timer_cancel should return 1 to indicate
+ * that arr_timer was active at this time
+ */
+ err |= 8;
+
+ /* try to cancel ourself. It shouldn't deadlock. */
+ if (bpf_timer_cancel(&val->timer) != -EDEADLK)
+ err |= 16;
+
+ /* delete this key and this timer anyway.
+ * It shouldn't deadlock either.
+ */
+ bpf_map_delete_elem(map, key);
+
+ /* in preallocated hashmap both 'key' and 'val' could have been
+ * reused to store another map element (like in LRU above),
+ * but in controlled test environment the below test works.
+ * It's not a use-after-free. The memory is owned by the map.
+ */
+ if (bpf_timer_start(&val->timer, 1000, 0) != -EINVAL)
+ err |= 32;
+ ok |= 2;
+ } else {
+ if (*key != HTAB_MALLOC)
+ err |= 64;
+
+ /* try to cancel ourself. It shouldn't deadlock. */
+ if (bpf_timer_cancel(&val->timer) != -EDEADLK)
+ err |= 128;
+
+ /* delete this key and this timer anyway.
+ * It shouldn't deadlock either.
+ */
+ bpf_map_delete_elem(map, key);
+
+ /* in non-preallocated hashmap both 'key' and 'val' are RCU
+ * protected and still valid though this element was deleted
+ * from the map. Arm this timer for ~35 seconds. When callback
+ * finishes the call_rcu will invoke:
+ * htab_elem_free_rcu
+ * check_and_free_timer
+ * bpf_timer_cancel_and_free
+ * to cancel this 35 second sleep and delete the timer for real.
+ */
+ if (bpf_timer_start(&val->timer, 1ull << 35, 0) != 0)
+ err |= 256;
+ ok |= 4;
+ }
+ return 0;
+}
+
+int bpf_timer_test(void)
+{
+ struct hmap_elem *val;
+ int key = HTAB, key_malloc = HTAB_MALLOC;
+
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val) {
+ if (bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME) != 0)
+ err |= 512;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ bpf_timer_start(&val->timer, 1000, 0);
+ }
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val) {
+ if (bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME) != 0)
+ err |= 1024;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ bpf_timer_start(&val->timer, 1000, 0);
+ }
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG(test2, int a, int b)
+{
+ struct hmap_elem init = {}, *val;
+ int key = HTAB, key_malloc = HTAB_MALLOC;
+
+ init.counter = 10; /* number of times to trigger timer_cb2 */
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+ /* update the same key to free the timer */
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+ /* update the same key to free the timer */
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+
+ /* init more timers to check that htab operations
+ * don't leak timer memory.
+ */
+ key = 0;
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+ bpf_map_delete_elem(&hmap, &key);
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+
+ /* and with non-prealloc htab */
+ key_malloc = 0;
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+ bpf_map_delete_elem(&hmap_malloc, &key_malloc);
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+
+ return bpf_timer_test();
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim.c b/tools/testing/selftests/bpf/progs/timer_mim.c
new file mode 100644
index 000000000000..2fee7ab105ef
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_mim.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int pad; /* unused */
+ struct bpf_timer timer;
+};
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} inner_htab SEC(".maps");
+
+#define ARRAY_KEY 1
+#define HASH_KEY 1234
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+ .values = { [ARRAY_KEY] = &inner_htab },
+};
+
+__u64 err;
+__u64 ok;
+__u64 cnt;
+
+static int timer_cb1(void *map, int *key, struct hmap_elem *val);
+
+static int timer_cb2(void *map, int *key, struct hmap_elem *val)
+{
+ cnt++;
+ bpf_timer_set_callback(&val->timer, timer_cb1);
+ if (bpf_timer_start(&val->timer, 1000, 0))
+ err |= 1;
+ ok |= 1;
+ return 0;
+}
+
+/* callback for inner hash map */
+static int timer_cb1(void *map, int *key, struct hmap_elem *val)
+{
+ cnt++;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ if (bpf_timer_start(&val->timer, 1000, 0))
+ err |= 2;
+ /* Do a lookup to make sure 'map' and 'key' pointers are correct */
+ bpf_map_lookup_elem(map, key);
+ ok |= 2;
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct hmap_elem init = {};
+ struct bpf_map *inner_map;
+ struct hmap_elem *val;
+ int array_key = ARRAY_KEY;
+ int hash_key = HASH_KEY;
+
+ inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
+ if (!inner_map)
+ return 0;
+
+ bpf_map_update_elem(inner_map, &hash_key, &init, 0);
+ val = bpf_map_lookup_elem(inner_map, &hash_key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->timer, inner_map, CLOCK_MONOTONIC);
+ if (bpf_timer_set_callback(&val->timer, timer_cb1))
+ err |= 4;
+ if (bpf_timer_start(&val->timer, 0, 0))
+ err |= 8;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim_reject.c b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
new file mode 100644
index 000000000000..5d648e3d8a41
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int pad; /* unused */
+ struct bpf_timer timer;
+};
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} inner_htab SEC(".maps");
+
+#define ARRAY_KEY 1
+#define ARRAY_KEY2 2
+#define HASH_KEY 1234
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+ .values = { [ARRAY_KEY] = &inner_htab },
+};
+
+__u64 err;
+__u64 ok;
+__u64 cnt;
+
+/* callback for inner hash map */
+static int timer_cb(void *map, int *key, struct hmap_elem *val)
+{
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct hmap_elem init = {};
+ struct bpf_map *inner_map, *inner_map2;
+ struct hmap_elem *val;
+ int array_key = ARRAY_KEY;
+ int array_key2 = ARRAY_KEY2;
+ int hash_key = HASH_KEY;
+
+ inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
+ if (!inner_map)
+ return 0;
+
+ inner_map2 = bpf_map_lookup_elem(&outer_arr, &array_key2);
+ if (!inner_map2)
+ return 0;
+ bpf_map_update_elem(inner_map, &hash_key, &init, 0);
+ val = bpf_map_lookup_elem(inner_map, &hash_key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->timer, inner_map2, CLOCK_MONOTONIC);
+ if (bpf_timer_set_callback(&val->timer, timer_cb))
+ err |= 4;
+ if (bpf_timer_start(&val->timer, 0, 0))
+ err |= 8;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c
index 94e6c2b281cb..5f725c720e00 100644
--- a/tools/testing/selftests/bpf/progs/xdp_tx.c
+++ b/tools/testing/selftests/bpf/progs/xdp_tx.c
@@ -3,7 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("tx")
+SEC("xdp")
int xdp_tx(struct xdp_md *xdp)
{
return XDP_TX;
diff --git a/tools/testing/selftests/bpf/test_bpftool.sh b/tools/testing/selftests/bpf/test_bpftool.sh
index 66690778e36d..718f59692ccb 100755
--- a/tools/testing/selftests/bpf/test_bpftool.sh
+++ b/tools/testing/selftests/bpf/test_bpftool.sh
@@ -2,4 +2,10 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2020 SUSE LLC.
+# 'make -C tools/testing/selftests/bpf install' will install to SCRIPT_DIR
+SCRIPT_DIR=$(dirname $(realpath $0))
+
+# 'make -C tools/testing/selftests/bpf' will install to BPFTOOL_INSTALL_PATH
+BPFTOOL_INSTALL_PATH="$SCRIPT_DIR"/tools/sbin
+export PATH=$SCRIPT_DIR:$BPFTOOL_INSTALL_PATH:$PATH
python3 -m unittest -v test_bpftool.TestBpftool
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index ac349a5cea7e..b03a87571592 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -22,7 +22,7 @@ KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
cd $KDIR_ROOT_DIR
if [ ! -e tools/bpf/bpftool/Makefile ]; then
echo -e "skip: bpftool files not found!\n"
- exit 0
+ exit 4 # KSFT_SKIP=4
fi
ERROR=0
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
new file mode 100755
index 000000000000..be54b7335a76
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -0,0 +1,586 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+#
+# Copyright (C) 2021 Isovalent, Inc.
+
+import argparse
+import re
+import os, sys
+
+LINUX_ROOT = os.path.abspath(os.path.join(__file__,
+ os.pardir, os.pardir, os.pardir, os.pardir, os.pardir))
+BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool')
+retval = 0
+
+class BlockParser(object):
+ """
+ A parser for extracting set of values from blocks such as enums.
+ @reader: a pointer to the open file to parse
+ """
+ def __init__(self, reader):
+ self.reader = reader
+
+ def search_block(self, start_marker):
+ """
+ Search for a given structure in a file.
+ @start_marker: regex marking the beginning of a structure to parse
+ """
+ offset = self.reader.tell()
+ array_start = re.search(start_marker, self.reader.read())
+ if array_start is None:
+ raise Exception('Failed to find start of block')
+ self.reader.seek(offset + array_start.start())
+
+ def parse(self, pattern, end_marker):
+ """
+ Parse a block and return a set of values. Values to extract must be
+ on separate lines in the file.
+ @pattern: pattern used to identify the values to extract
+ @end_marker: regex marking the end of the block to parse
+ """
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if not line or re.match(end_marker, line):
+ break
+ capture = pattern.search(line)
+ if capture and pattern.groups >= 1:
+ entries.add(capture.group(1))
+ return entries
+
+class ArrayParser(BlockParser):
+ """
+ A parser for extracting dicionaries of values from some BPF-related arrays.
+ @reader: a pointer to the open file to parse
+ @array_name: name of the array to parse
+ """
+ end_marker = re.compile('^};')
+
+ def __init__(self, reader, array_name):
+ self.array_name = array_name
+ self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n')
+ super().__init__(reader)
+
+ def search_block(self):
+ """
+ Search for the given array in a file.
+ """
+ super().search_block(self.start_marker);
+
+ def parse(self):
+ """
+ Parse a block and return data as a dictionary. Items to extract must be
+ on separate lines in the file.
+ """
+ pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$')
+ entries = {}
+ while True:
+ line = self.reader.readline()
+ if line == '' or re.match(self.end_marker, line):
+ break
+ capture = pattern.search(line)
+ if capture:
+ entries[capture.group(1)] = capture.group(2)
+ return entries
+
+class InlineListParser(BlockParser):
+ """
+ A parser for extracting set of values from inline lists.
+ """
+ def parse(self, pattern, end_marker):
+ """
+ Parse a block and return a set of values. Multiple values to extract
+ can be on a same line in the file.
+ @pattern: pattern used to identify the values to extract
+ @end_marker: regex marking the end of the block to parse
+ """
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if not line:
+ break
+ entries.update(pattern.findall(line))
+ if re.search(end_marker, line):
+ break
+ return entries
+
+class FileExtractor(object):
+ """
+ A generic reader for extracting data from a given file. This class contains
+ several helper methods that wrap arround parser objects to extract values
+ from different structures.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def __init__(self):
+ self.reader = open(self.filename, 'r')
+
+ def close(self):
+ """
+ Close the file used by the parser.
+ """
+ self.reader.close()
+
+ def reset_read(self):
+ """
+ Reset the file position indicator for this parser. This is useful when
+ parsing several structures in the file without respecting the order in
+ which those structures appear in the file.
+ """
+ self.reader.seek(0)
+
+ def get_types_from_array(self, array_name):
+ """
+ Search for and parse an array associating names to BPF_* enum members,
+ for example:
+
+ const char * const prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = "unspec",
+ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
+ [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ };
+
+ Return a dictionary with the enum member names as keys and the
+ associated names as values, for example:
+
+ {'BPF_PROG_TYPE_UNSPEC': 'unspec',
+ 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter',
+ 'BPF_PROG_TYPE_KPROBE': 'kprobe'}
+
+ @array_name: name of the array to parse
+ """
+ array_parser = ArrayParser(self.reader, array_name)
+ array_parser.search_block()
+ return array_parser.parse()
+
+ def get_enum(self, enum_name):
+ """
+ Search for and parse an enum containing BPF_* members, for example:
+
+ enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ };
+
+ Return a set containing all member names, for example:
+
+ {'BPF_PROG_TYPE_UNSPEC',
+ 'BPF_PROG_TYPE_SOCKET_FILTER',
+ 'BPF_PROG_TYPE_KPROBE'}
+
+ @enum_name: name of the enum to parse
+ """
+ start_marker = re.compile(f'enum {enum_name} {{\n')
+ pattern = re.compile('^\s*(BPF_\w+),?$')
+ end_marker = re.compile('^};')
+ parser = BlockParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+ def __get_description_list(self, start_marker, pattern, end_marker):
+ parser = InlineListParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+ def get_rst_list(self, block_name):
+ """
+ Search for and parse a list of type names from RST documentation, for
+ example:
+
+ | *TYPE* := {
+ | **socket** | **kprobe** |
+ | **kretprobe**
+ | }
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'\*{block_name}\* := {{')
+ pattern = re.compile('\*\*([\w/-]+)\*\*')
+ end_marker = re.compile('}\n')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_help_list(self, block_name):
+ """
+ Search for and parse a list of type names from a help message in
+ bpftool, for example:
+
+ " TYPE := { socket | kprobe |\\n"
+ " kretprobe }\\n"
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'"\s*{block_name} := {{')
+ pattern = re.compile('([\w/]+) [|}]')
+ end_marker = re.compile('}')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_help_list_macro(self, macro):
+ """
+ Search for and parse a list of values from a help message starting with
+ a macro in bpftool, for example:
+
+ " " HELP_SPEC_OPTIONS " |\\n"
+ " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} }\\n"
+
+ Return a set containing all item names, for example:
+
+ {'-f', '--bpffs', '-m', '--mapcompat', '-n', '--nomount'}
+
+ @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example
+ """
+ start_marker = re.compile(f'"\s*{macro}\s*" [|}}]')
+ pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])')
+ end_marker = re.compile('}\\\\n')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def default_options(self):
+ """
+ Return the default options contained in HELP_SPEC_OPTIONS
+ """
+ return { '-j', '--json', '-p', '--pretty', '-d', '--debug' }
+
+ def get_bashcomp_list(self, block_name):
+ """
+ Search for and parse a list of type names from a variable in bash
+ completion file, for example:
+
+ local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \\
+ kretprobe'
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'local {block_name}=\'')
+ pattern = re.compile('(?:.*=\')?([\w/]+)')
+ end_marker = re.compile('\'$')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+class SourceFileExtractor(FileExtractor):
+ """
+ An abstract extractor for a source file with usage message.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def get_options(self):
+ return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS'))
+
+class ProgFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's prog.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'prog.c')
+
+ def get_prog_types(self):
+ return self.get_types_from_array('prog_type_name')
+
+ def get_attach_types(self):
+ return self.get_types_from_array('attach_type_strings')
+
+ def get_prog_attach_help(self):
+ return self.get_help_list('ATTACH_TYPE')
+
+class MapFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's map.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'map.c')
+
+ def get_map_types(self):
+ return self.get_types_from_array('map_type_name')
+
+ def get_map_help(self):
+ return self.get_help_list('TYPE')
+
+class CgroupFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's cgroup.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'cgroup.c')
+
+ def get_prog_attach_help(self):
+ return self.get_help_list('ATTACH_TYPE')
+
+class CommonFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's common.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'common.c')
+
+ def __init__(self):
+ super().__init__()
+ self.attach_types = {}
+
+ def get_attach_types(self):
+ if not self.attach_types:
+ self.attach_types = self.get_types_from_array('attach_type_name')
+ return self.attach_types
+
+ def get_cgroup_attach_types(self):
+ if not self.attach_types:
+ self.get_attach_types()
+ cgroup_types = {}
+ for (key, value) in self.attach_types.items():
+ if key.find('BPF_CGROUP') != -1:
+ cgroup_types[key] = value
+ return cgroup_types
+
+class GenericSourceExtractor(SourceFileExtractor):
+ """
+ An extractor for generic source code files.
+ """
+ filename = ""
+
+ def __init__(self, filename):
+ self.filename = os.path.join(BPFTOOL_DIR, filename)
+ super().__init__()
+
+class BpfHeaderExtractor(FileExtractor):
+ """
+ An extractor for the UAPI BPF header.
+ """
+ filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h')
+
+ def get_prog_types(self):
+ return self.get_enum('bpf_prog_type')
+
+ def get_map_types(self):
+ return self.get_enum('bpf_map_type')
+
+ def get_attach_types(self):
+ return self.get_enum('bpf_attach_type')
+
+class ManPageExtractor(FileExtractor):
+ """
+ An abstract extractor for an RST documentation page.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def get_options(self):
+ return self.get_rst_list('OPTIONS')
+
+class ManProgExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-prog.rst.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst')
+
+ def get_attach_types(self):
+ return self.get_rst_list('ATTACH_TYPE')
+
+class ManMapExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-map.rst.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst')
+
+ def get_map_types(self):
+ return self.get_rst_list('TYPE')
+
+class ManCgroupExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-cgroup.rst.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst')
+
+ def get_attach_types(self):
+ return self.get_rst_list('ATTACH_TYPE')
+
+class ManGenericExtractor(ManPageExtractor):
+ """
+ An extractor for generic RST documentation pages.
+ """
+ filename = ""
+
+ def __init__(self, filename):
+ self.filename = os.path.join(BPFTOOL_DIR, filename)
+ super().__init__()
+
+class BashcompExtractor(FileExtractor):
+ """
+ An extractor for bpftool's bash completion file.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool')
+
+ def get_prog_attach_types(self):
+ return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
+
+ def get_map_types(self):
+ return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES')
+
+ def get_cgroup_attach_types(self):
+ return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES')
+
+def verify(first_set, second_set, message):
+ """
+ Print all values that differ between two sets.
+ @first_set: one set to compare
+ @second_set: another set to compare
+ @message: message to print for values belonging to only one of the sets
+ """
+ global retval
+ diff = first_set.symmetric_difference(second_set)
+ if diff:
+ print(message, diff)
+ retval = 1
+
+def main():
+ # No arguments supported at this time, but print usage for -h|--help
+ argParser = argparse.ArgumentParser(description="""
+ Verify that bpftool's code, help messages, documentation and bash
+ completion are all in sync on program types, map types, attach types, and
+ options. Also check that bpftool is in sync with the UAPI BPF header.
+ """)
+ args = argParser.parse_args()
+
+ # Map types (enum)
+
+ bpf_info = BpfHeaderExtractor()
+ ref = bpf_info.get_map_types()
+
+ map_info = MapFileExtractor()
+ source_map_items = map_info.get_map_types()
+ map_types_enum = set(source_map_items.keys())
+
+ verify(ref, map_types_enum,
+ f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):')
+
+ # Map types (names)
+
+ source_map_types = set(source_map_items.values())
+ source_map_types.discard('unspec')
+
+ help_map_types = map_info.get_map_help()
+ help_map_options = map_info.get_options()
+ map_info.close()
+
+ man_map_info = ManMapExtractor()
+ man_map_options = man_map_info.get_options()
+ man_map_types = man_map_info.get_map_types()
+ man_map_info.close()
+
+ bashcomp_info = BashcompExtractor()
+ bashcomp_map_types = bashcomp_info.get_map_types()
+
+ verify(source_map_types, help_map_types,
+ f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):')
+ verify(source_map_types, man_map_types,
+ f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):')
+ verify(help_map_options, man_map_options,
+ f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
+ verify(source_map_types, bashcomp_map_types,
+ f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):')
+
+ # Program types (enum)
+
+ ref = bpf_info.get_prog_types()
+
+ prog_info = ProgFileExtractor()
+ prog_types = set(prog_info.get_prog_types().keys())
+
+ verify(ref, prog_types,
+ f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):')
+
+ # Attach types (enum)
+
+ ref = bpf_info.get_attach_types()
+ bpf_info.close()
+
+ common_info = CommonFileExtractor()
+ attach_types = common_info.get_attach_types()
+
+ verify(ref, attach_types,
+ f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):')
+
+ # Attach types (names)
+
+ source_prog_attach_types = set(prog_info.get_attach_types().values())
+
+ help_prog_attach_types = prog_info.get_prog_attach_help()
+ help_prog_options = prog_info.get_options()
+ prog_info.close()
+
+ man_prog_info = ManProgExtractor()
+ man_prog_options = man_prog_info.get_options()
+ man_prog_attach_types = man_prog_info.get_attach_types()
+ man_prog_info.close()
+
+ bashcomp_info.reset_read() # We stopped at map types, rewind
+ bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
+
+ verify(source_prog_attach_types, help_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ verify(source_prog_attach_types, man_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):')
+ verify(help_prog_options, man_prog_options,
+ f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):')
+ verify(source_prog_attach_types, bashcomp_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
+
+ # Cgroup attach types
+
+ source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values())
+ common_info.close()
+
+ cgroup_info = CgroupFileExtractor()
+ help_cgroup_attach_types = cgroup_info.get_prog_attach_help()
+ help_cgroup_options = cgroup_info.get_options()
+ cgroup_info.close()
+
+ man_cgroup_info = ManCgroupExtractor()
+ man_cgroup_options = man_cgroup_info.get_options()
+ man_cgroup_attach_types = man_cgroup_info.get_attach_types()
+ man_cgroup_info.close()
+
+ bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types()
+ bashcomp_info.close()
+
+ verify(source_cgroup_attach_types, help_cgroup_attach_types,
+ f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ verify(source_cgroup_attach_types, man_cgroup_attach_types,
+ f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
+ verify(help_cgroup_options, man_cgroup_options,
+ f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
+ verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types,
+ f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):')
+
+ # Options for remaining commands
+
+ for cmd in [ 'btf', 'feature', 'gen', 'iter', 'link', 'net', 'perf', 'struct_ops', ]:
+ source_info = GenericSourceExtractor(cmd + '.c')
+ help_cmd_options = source_info.get_options()
+ source_info.close()
+
+ man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst'))
+ man_cmd_options = man_cmd_info.get_options()
+ man_cmd_info.close()
+
+ verify(help_cmd_options, man_cmd_options,
+ f'Comparing {source_info.filename} (do_help() OPTIONS) and {man_cmd_info.filename} (OPTIONS):')
+
+ source_main_info = GenericSourceExtractor('main.c')
+ help_main_options = source_main_info.get_options()
+ source_main_info.close()
+
+ man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst'))
+ man_main_options = man_main_info.get_options()
+ man_main_info.close()
+
+ verify(help_main_options, man_main_options,
+ f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):')
+
+ sys.exit(retval)
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
index ed12111cd2f0..679cf968c7d1 100755
--- a/tools/testing/selftests/bpf/test_doc_build.sh
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -4,11 +4,17 @@ set -e
# Assume script is located under tools/testing/selftests/bpf/. We want to start
# build attempts from the top of kernel repository.
-SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_PATH=$(realpath $0)
SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
-KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+KDIR_ROOT_DIR=$(realpath $SCRIPT_REL_DIR/../../../../)
+SCRIPT_REL_DIR=$(dirname $(realpath --relative-to=$KDIR_ROOT_DIR $SCRIPT_REL_PATH))
cd $KDIR_ROOT_DIR
+if [ ! -e $PWD/$SCRIPT_REL_DIR/Makefile ]; then
+ echo -e "skip: bpftool files not found!\n"
+ exit 4 # KSFT_SKIP=4
+fi
+
for tgt in docs docs-clean; do
make -s -C $PWD/$SCRIPT_REL_DIR $tgt;
done
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 30cbf5d98f7d..c7a36a9378f8 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -764,8 +764,8 @@ static void test_sockmap(unsigned int tasks, void *data)
udp = socket(AF_INET, SOCK_DGRAM, 0);
i = 0;
err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
- if (!err) {
- printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+ if (err) {
+ printf("Failed socket update SOCK_DGRAM '%i:%i'\n",
i, udp);
goto out_sockmap;
}
@@ -985,7 +985,7 @@ static void test_sockmap(unsigned int tasks, void *data)
FD_ZERO(&w);
FD_SET(sfd[3], &w);
- to.tv_sec = 1;
+ to.tv_sec = 30;
to.tv_usec = 0;
s = select(sfd[3] + 1, &w, NULL, NULL, &to);
if (s == -1) {
@@ -1153,12 +1153,17 @@ out_sockmap:
}
#define MAPINMAP_PROG "./test_map_in_map.o"
+#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.o"
static void test_map_in_map(void)
{
struct bpf_object *obj;
struct bpf_map *map;
int mim_fd, fd, err;
int pos = 0;
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ __u32 id = 0;
+ libbpf_print_fn_t old_print_fn;
obj = bpf_object__open(MAPINMAP_PROG);
@@ -1228,11 +1233,72 @@ static void test_map_in_map(void)
}
close(fd);
+ fd = -1;
bpf_object__close(obj);
+
+ /* Test that failing bpf_object__create_map() destroys the inner map */
+ obj = bpf_object__open(MAPINMAP_INVALID_PROG);
+ err = libbpf_get_error(obj);
+ if (err) {
+ printf("Failed to load %s program: %d %d",
+ MAPINMAP_INVALID_PROG, err, errno);
+ goto out_map_in_map;
+ }
+
+ map = bpf_object__find_map_by_name(obj, "mim");
+ if (!map) {
+ printf("Failed to load array of maps from test prog\n");
+ goto out_map_in_map;
+ }
+
+ old_print_fn = libbpf_set_print(NULL);
+
+ err = bpf_object__load(obj);
+ if (!err) {
+ printf("Loading obj supposed to fail\n");
+ goto out_map_in_map;
+ }
+
+ libbpf_set_print(old_print_fn);
+
+ /* Iterate over all maps to check whether the internal map
+ * ("mim.internal") has been destroyed.
+ */
+ while (true) {
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ printf("Failed to get next map: %d", errno);
+ goto out_map_in_map;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ printf("Failed to get map by id %u: %d", id, errno);
+ goto out_map_in_map;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ printf("Failed to get map info by fd %d: %d", fd,
+ errno);
+ goto out_map_in_map;
+ }
+
+ if (!strcmp(info.name, "mim.inner")) {
+ printf("Inner map mim.inner was not destroyed\n");
+ goto out_map_in_map;
+ }
+ }
+
return;
out_map_in_map:
- close(fd);
+ if (fd >= 0)
+ close(fd);
exit(1);
}
@@ -1330,15 +1396,22 @@ static void test_map_stress(void)
#define DO_DELETE 0
#define MAP_RETRIES 20
+#define MAX_DELAY_US 50000
+#define MIN_DELAY_RANGE_US 5000
static int map_update_retriable(int map_fd, const void *key, const void *value,
int flags, int attempts)
{
+ int delay = rand() % MIN_DELAY_RANGE_US;
+
while (bpf_map_update_elem(map_fd, key, value, flags)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;
- usleep(1);
+ if (delay <= MAX_DELAY_US / 2)
+ delay *= 2;
+
+ usleep(delay);
attempts--;
}
@@ -1347,11 +1420,16 @@ static int map_update_retriable(int map_fd, const void *key, const void *value,
static int map_delete_retriable(int map_fd, const void *key, int attempts)
{
+ int delay = rand() % MIN_DELAY_RANGE_US;
+
while (bpf_map_delete_elem(map_fd, key)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;
- usleep(1);
+ if (delay <= MAX_DELAY_US / 2)
+ delay *= 2;
+
+ usleep(delay);
attempts--;
}
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
deleted file mode 100644
index a7b9a69f4fd5..000000000000
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-#include "netcnt_common.h"
-
-#define BPF_PROG "./netcnt_prog.o"
-#define TEST_CGROUP "/test-network-counters/"
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
- const char *name)
-{
- struct bpf_map *map;
-
- map = bpf_object__find_map_by_name(obj, name);
- if (!map) {
- printf("%s:FAIL:map '%s' not found\n", test, name);
- return -1;
- }
- return bpf_map__fd(map);
-}
-
-int main(int argc, char **argv)
-{
- struct percpu_net_cnt *percpu_netcnt;
- struct bpf_cgroup_storage_key key;
- int map_fd, percpu_map_fd;
- int error = EXIT_FAILURE;
- struct net_cnt netcnt;
- struct bpf_object *obj;
- int prog_fd, cgroup_fd;
- unsigned long packets;
- unsigned long bytes;
- int cpu, nproc;
- __u32 prog_cnt;
-
- nproc = get_nprocs_conf();
- percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
- if (!percpu_netcnt) {
- printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
- goto err;
- }
-
- if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
- &obj, &prog_fd)) {
- printf("Failed to load bpf program\n");
- goto out;
- }
-
- cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
- if (cgroup_fd < 0)
- goto err;
-
- /* Attach bpf program */
- if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
- printf("Failed to attach bpf program");
- goto err;
- }
-
- if (system("which ping6 &>/dev/null") == 0)
- assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
- else
- assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
-
- if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
- &prog_cnt)) {
- printf("Failed to query attached programs");
- goto err;
- }
-
- map_fd = bpf_find_map(__func__, obj, "netcnt");
- if (map_fd < 0) {
- printf("Failed to find bpf map with net counters");
- goto err;
- }
-
- percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
- if (percpu_map_fd < 0) {
- printf("Failed to find bpf map with percpu net counters");
- goto err;
- }
-
- if (bpf_map_get_next_key(map_fd, NULL, &key)) {
- printf("Failed to get key in cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
- printf("Failed to lookup cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
- printf("Failed to lookup percpu cgroup storage\n");
- goto err;
- }
-
- /* Some packets can be still in per-cpu cache, but not more than
- * MAX_PERCPU_PACKETS.
- */
- packets = netcnt.packets;
- bytes = netcnt.bytes;
- for (cpu = 0; cpu < nproc; cpu++) {
- if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
- printf("Unexpected percpu value: %llu\n",
- percpu_netcnt[cpu].packets);
- goto err;
- }
-
- packets += percpu_netcnt[cpu].packets;
- bytes += percpu_netcnt[cpu].bytes;
- }
-
- /* No packets should be lost */
- if (packets != 10000) {
- printf("Unexpected packet count: %lu\n", packets);
- goto err;
- }
-
- /* Let's check that bytes counter matches the number of packets
- * multiplied by the size of ipv6 ICMP packet.
- */
- if (bytes != packets * 104) {
- printf("Unexpected bytes count: %lu\n", bytes);
- goto err;
- }
-
- error = 0;
- printf("test_netcnt:PASS\n");
-
-err:
- cleanup_cgroup_environment();
- free(percpu_netcnt);
-
-out:
- return error;
-}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6f103106a39b..cc1cd240445d 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -13,6 +13,28 @@
#include <execinfo.h> /* backtrace */
#include <linux/membarrier.h>
+/* Adapted from perf/util/string.c */
+static bool glob_match(const char *str, const char *pat)
+{
+ while (*str && *pat && *pat != '*') {
+ if (*str != *pat)
+ return false;
+ str++;
+ pat++;
+ }
+ /* Check wild card */
+ if (*pat == '*') {
+ while (*pat == '*')
+ pat++;
+ if (!*pat) /* Tail wild card matches all */
+ return true;
+ while (*str)
+ if (glob_match(str++, pat))
+ return true;
+ }
+ return !*str && !*pat;
+}
+
#define EXIT_NO_TEST 2
#define EXIT_ERR_SETUP_INFRA 3
@@ -55,12 +77,12 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
int i;
for (i = 0; i < sel->blacklist.cnt; i++) {
- if (strstr(name, sel->blacklist.strs[i]))
+ if (glob_match(name, sel->blacklist.strs[i]))
return false;
}
for (i = 0; i < sel->whitelist.cnt; i++) {
- if (strstr(name, sel->whitelist.strs[i]))
+ if (glob_match(name, sel->whitelist.strs[i]))
return true;
}
@@ -148,18 +170,18 @@ void test__end_subtest()
struct prog_test_def *test = env.test;
int sub_error_cnt = test->error_cnt - test->old_error_cnt;
+ dump_test_log(test, sub_error_cnt);
+
+ fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
+ test->test_num, test->subtest_num, test->test_name, test->subtest_name,
+ sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+
if (sub_error_cnt)
env.fail_cnt++;
else if (test->skip_cnt == 0)
env.sub_succ_cnt++;
skip_account();
- dump_test_log(test, sub_error_cnt);
-
- fprintf(env.stdout, "#%d/%d %s:%s\n",
- test->test_num, test->subtest_num, test->subtest_name,
- sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
-
free(test->subtest_name);
test->subtest_name = NULL;
}
@@ -450,6 +472,8 @@ enum ARG_KEYS {
ARG_VERBOSE = 'v',
ARG_GET_TEST_CNT = 'c',
ARG_LIST_TEST_NAMES = 'l',
+ ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
+ ARG_TEST_NAME_GLOB_DENYLIST = 'd',
};
static const struct argp_option opts[] = {
@@ -467,6 +491,10 @@ static const struct argp_option opts[] = {
"Get number of selected top-level tests " },
{ "list", ARG_LIST_TEST_NAMES, NULL, 0,
"List test names that would run (without running them) " },
+ { "allow", ARG_TEST_NAME_GLOB_ALLOWLIST, "NAMES", 0,
+ "Run tests with name matching the pattern (supports '*' wildcard)." },
+ { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0,
+ "Don't run tests with name matching the pattern (supports '*' wildcard)." },
{},
};
@@ -491,36 +519,48 @@ static void free_str_set(const struct str_set *set)
free(set->strs);
}
-static int parse_str_list(const char *s, struct str_set *set)
+static int parse_str_list(const char *s, struct str_set *set, bool is_glob_pattern)
{
char *input, *state = NULL, *next, **tmp, **strs = NULL;
- int cnt = 0;
+ int i, cnt = 0;
input = strdup(s);
if (!input)
return -ENOMEM;
- set->cnt = 0;
- set->strs = NULL;
-
while ((next = strtok_r(state ? NULL : input, ",", &state))) {
tmp = realloc(strs, sizeof(*strs) * (cnt + 1));
if (!tmp)
goto err;
strs = tmp;
- strs[cnt] = strdup(next);
- if (!strs[cnt])
- goto err;
+ if (is_glob_pattern) {
+ strs[cnt] = strdup(next);
+ if (!strs[cnt])
+ goto err;
+ } else {
+ strs[cnt] = malloc(strlen(next) + 2 + 1);
+ if (!strs[cnt])
+ goto err;
+ sprintf(strs[cnt], "*%s*", next);
+ }
cnt++;
}
- set->cnt = cnt;
- set->strs = (const char **)strs;
+ tmp = realloc(set->strs, sizeof(*strs) * (cnt + set->cnt));
+ if (!tmp)
+ goto err;
+ memcpy(tmp + set->cnt, strs, sizeof(*strs) * cnt);
+ set->strs = (const char **)tmp;
+ set->cnt += cnt;
+
free(input);
+ free(strs);
return 0;
err:
+ for (i = 0; i < cnt; i++)
+ free(strs[i]);
free(strs);
free(input);
return -ENOMEM;
@@ -553,29 +593,35 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
}
break;
}
+ case ARG_TEST_NAME_GLOB_ALLOWLIST:
case ARG_TEST_NAME: {
char *subtest_str = strchr(arg, '/');
if (subtest_str) {
*subtest_str = '\0';
if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.whitelist))
+ &env->subtest_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST))
return -ENOMEM;
}
- if (parse_str_list(arg, &env->test_selector.whitelist))
+ if (parse_str_list(arg, &env->test_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST))
return -ENOMEM;
break;
}
+ case ARG_TEST_NAME_GLOB_DENYLIST:
case ARG_TEST_NAME_BLACKLIST: {
char *subtest_str = strchr(arg, '/');
if (subtest_str) {
*subtest_str = '\0';
if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.blacklist))
+ &env->subtest_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST))
return -ENOMEM;
}
- if (parse_str_list(arg, &env->test_selector.blacklist))
+ if (parse_str_list(arg, &env->test_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST))
return -ENOMEM;
break;
}
@@ -755,7 +801,7 @@ int main(int argc, char **argv)
save_netns();
stdio_hijack();
env.has_testmod = true;
- if (load_bpf_testmod()) {
+ if (!env.list_test_names && load_bpf_testmod()) {
fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
env.has_testmod = false;
}
@@ -786,24 +832,25 @@ int main(int argc, char **argv)
test__end_subtest();
test->tested = true;
- if (test->error_cnt)
- env.fail_cnt++;
- else
- env.succ_cnt++;
- skip_account();
dump_test_log(test, test->error_cnt);
fprintf(env.stdout, "#%d %s:%s\n",
test->test_num, test->test_name,
- test->error_cnt ? "FAIL" : "OK");
+ test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+
+ if (test->error_cnt)
+ env.fail_cnt++;
+ else
+ env.succ_cnt++;
+ skip_account();
reset_affinity();
restore_netns();
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
}
- if (env.has_testmod)
+ if (!env.list_test_names && env.has_testmod)
unload_bpf_testmod();
stdio_restore();
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 8ef7f334e715..c8c2bf878f67 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -221,6 +221,18 @@ extern int test__join_cgroup(const char *path);
___ok; \
})
+#define ASSERT_STRNEQ(actual, expected, len, name) ({ \
+ static int duration = 0; \
+ const char *___act = actual; \
+ const char *___exp = expected; \
+ int ___len = len; \
+ bool ___ok = strncmp(___act, ___exp, ___len) == 0; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual '%.*s' != expected '%.*s'\n", \
+ (name), ___len, ___act, ___len, ___exp); \
+ ___ok; \
+})
+
#define ASSERT_OK(res, name) ({ \
static int duration = 0; \
long long ___res = (res); \
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index c9dde9b9d987..088fcad138c9 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -69,7 +69,7 @@ cleanup() {
}
server_listen() {
- ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
+ ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
server_pid=$!
sleep 0.2
}
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
index ba8ffcdaac30..995278e684b6 100755
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -108,7 +108,7 @@ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
-ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
+ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
trap cleanup EXIT
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 46633a3bfb0b..cd7bf32e6a17 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -63,14 +63,11 @@
# ----------------
# Must run with CAP_NET_ADMIN capability.
#
-# Run (full color-coded output):
-# sudo ./test_xsk.sh -c
+# Run:
+# sudo ./test_xsk.sh
#
# If running from kselftests:
-# sudo make colorconsole=1 run_tests
-#
-# Run (full output without color-coding):
-# sudo ./test_xsk.sh
+# sudo make run_tests
#
# Run with verbose output:
# sudo ./test_xsk.sh -v
@@ -83,7 +80,6 @@
while getopts "cvD" flag
do
case "${flag}" in
- c) colorconsole=1;;
v) verbose=1;;
D) dump_pkts=1;;
esac
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 1bbd1d9830c8..e7a19b04d4ea 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -136,3 +136,90 @@ void read_trace_pipe(void)
}
}
}
+
+#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
+
+#define OP_RT_RA_MASK 0xffff0000UL
+#define LIS_R2 0x3c400000UL
+#define ADDIS_R2_R12 0x3c4c0000UL
+#define ADDI_R2_R2 0x38420000UL
+
+ssize_t get_uprobe_offset(const void *addr, ssize_t base)
+{
+ u32 *insn = (u32 *)(uintptr_t)addr;
+
+ /*
+ * A PPC64 ABIv2 function may have a local and a global entry
+ * point. We need to use the local entry point when patching
+ * functions, so identify and step over the global entry point
+ * sequence.
+ *
+ * The global entry point sequence is always of the form:
+ *
+ * addis r2,r12,XXXX
+ * addi r2,r2,XXXX
+ *
+ * A linker optimisation may convert the addis to lis:
+ *
+ * lis r2,XXXX
+ * addi r2,r2,XXXX
+ */
+ if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+ ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+ ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+ return (ssize_t)(insn + 2) - base;
+ else
+ return (uintptr_t)addr - base;
+}
+
+#else
+
+ssize_t get_uprobe_offset(const void *addr, ssize_t base)
+{
+ return (uintptr_t)addr - base;
+}
+
+#endif
+
+ssize_t get_base_addr(void)
+{
+ size_t start, offset;
+ char buf[256];
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
+ &start, buf, &offset) == 3) {
+ if (strcmp(buf, "r-xp") == 0) {
+ fclose(f);
+ return start - offset;
+ }
+ }
+
+ fclose(f);
+ return -EINVAL;
+}
+
+ssize_t get_rel_offset(uintptr_t addr)
+{
+ size_t start, end, offset;
+ char buf[256];
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &offset) == 4) {
+ if (addr >= start && addr < end) {
+ fclose(f);
+ return (size_t)addr - start + offset;
+ }
+ }
+
+ fclose(f);
+ return -EINVAL;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index f62fdef9e589..d907b445524d 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -18,4 +18,8 @@ int kallsyms_find(const char *sym, unsigned long long *addr);
void read_trace_pipe(void);
+ssize_t get_uprobe_offset(const void *addr, ssize_t base);
+ssize_t get_base_addr(void);
+ssize_t get_rel_offset(uintptr_t addr);
+
#endif
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 1135fb980814..f53ce2683f8d 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -70,7 +70,6 @@
#include <errno.h>
#include <getopt.h>
#include <asm/barrier.h>
-typedef __u16 __sum16;
#include <linux/if_link.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
@@ -106,14 +105,9 @@ static const u16 UDP_PORT2 = 2121;
static void __exit_with_error(int error, const char *file, const char *func, int line)
{
- if (configured_mode == TEST_MODE_UNCONFIGURED) {
- ksft_exit_fail_msg
- ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
- } else {
- ksft_test_result_fail
- ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
- ksft_exit_xfail();
- }
+ ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
+ strerror(error));
+ ksft_exit_xfail();
}
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
@@ -126,7 +120,7 @@ static void __exit_with_error(int error, const char *file, const char *func, int
test_type == TEST_TYPE_STATS ? "Stats" : "",\
test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
-static void *memset32_htonl(void *dest, u32 val, u32 size)
+static void memset32_htonl(void *dest, u32 val, u32 size)
{
u32 *ptr = (u32 *)dest;
int i;
@@ -135,11 +129,6 @@ static void *memset32_htonl(void *dest, u32 val, u32 size)
for (i = 0; i < (size & (~0x3)); i += 4)
ptr[i >> 2] = val;
-
- for (; i < size; i++)
- ((char *)dest)[i] = ((char *)&val)[i & 3];
-
- return dest;
}
/*
@@ -230,13 +219,13 @@ static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr)
ip_hdr->check = 0;
}
-static void gen_udp_hdr(struct generic_data *data, struct ifobject *ifobject,
+static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject,
struct udphdr *udp_hdr)
{
udp_hdr->source = htons(ifobject->src_port);
udp_hdr->dest = htons(ifobject->dst_port);
udp_hdr->len = htons(UDP_PKT_SIZE);
- memset32_htonl(pkt_data + PKT_HDR_SIZE, htonl(data->seqnum), UDP_PKT_DATA_SIZE);
+ memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE);
}
static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
@@ -246,12 +235,7 @@ static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
}
-static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
-{
- memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
-}
-
-static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
+static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size, int idx)
{
struct xsk_umem_config cfg = {
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -260,7 +244,6 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
.frame_headroom = frame_headroom,
.flags = XSK_UMEM__DEFAULT_FLAGS
};
- int size = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
struct xsk_umem_info *umem;
int ret;
@@ -271,7 +254,7 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
ret = xsk_umem__create(&umem->umem, buffer, size,
&umem->fq, &umem->cq, &cfg);
if (ret)
- exit_with_error(ret);
+ exit_with_error(-ret);
umem->buffer = buffer;
@@ -285,7 +268,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
- exit_with_error(ret);
+ exit_with_error(-ret);
for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
@@ -333,20 +316,19 @@ static struct option long_options[] = {
{"queue", optional_argument, 0, 'q'},
{"dump-pkts", optional_argument, 0, 'D'},
{"verbose", no_argument, 0, 'v'},
- {"tx-pkt-count", optional_argument, 0, 'C'},
{0, 0, 0, 0}
};
static void usage(const char *prog)
{
const char *str =
- " Usage: %s [OPTIONS]\n"
- " Options:\n"
- " -i, --interface Use interface\n"
- " -q, --queue=n Use queue n (default 0)\n"
- " -D, --dump-pkts Dump packets L2 - L5\n"
- " -v, --verbose Verbose output\n"
- " -C, --tx-pkt-count=n Number of packets to send\n";
+ " Usage: %s [OPTIONS]\n"
+ " Options:\n"
+ " -i, --interface Use interface\n"
+ " -q, --queue=n Use queue n (default 0)\n"
+ " -D, --dump-pkts Dump packets L2 - L5\n"
+ " -v, --verbose Verbose output\n";
+
ksft_print_msg(str, prog);
}
@@ -392,7 +374,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "i:DC:v", long_options, &option_index);
+ c = getopt_long(argc, argv, "i:Dv", long_options, &option_index);
if (c == -1)
break;
@@ -413,13 +395,10 @@ static void parse_command_line(int argc, char **argv)
interface_index++;
break;
case 'D':
- debug_pkt_dump = 1;
- break;
- case 'C':
- opt_pkt_count = atoi(optarg);
+ opt_pkt_dump = true;
break;
case 'v':
- opt_verbose = 1;
+ opt_verbose = true;
break;
default:
usage(basename(argv[0]));
@@ -427,17 +406,143 @@ static void parse_command_line(int argc, char **argv)
}
}
- if (!opt_pkt_count) {
- print_verbose("No tx-pkt-count specified, using default %u\n", DEFAULT_PKT_CNT);
- opt_pkt_count = DEFAULT_PKT_CNT;
- }
-
if (!validate_interfaces()) {
usage(basename(argv[0]));
ksft_exit_xfail();
}
}
+static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
+{
+ if (pkt_nb >= pkt_stream->nb_pkts)
+ return NULL;
+
+ return &pkt_stream->pkts[pkt_nb];
+}
+
+static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ pkt_stream = malloc(sizeof(*pkt_stream));
+ if (!pkt_stream)
+ exit_with_error(ENOMEM);
+
+ pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+ if (!pkt_stream->pkts)
+ exit_with_error(ENOMEM);
+
+ pkt_stream->nb_pkts = nb_pkts;
+ for (i = 0; i < nb_pkts; i++) {
+ pkt_stream->pkts[i].addr = (i % num_frames) * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ pkt_stream->pkts[i].len = pkt_len;
+ pkt_stream->pkts[i].payload = i;
+ }
+
+ return pkt_stream;
+}
+
+static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
+{
+ struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
+ struct udphdr *udp_hdr;
+ struct ethhdr *eth_hdr;
+ struct iphdr *ip_hdr;
+ void *data;
+
+ if (!pkt)
+ return NULL;
+
+ data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
+ udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr));
+ ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+ eth_hdr = (struct ethhdr *)data;
+
+ gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr);
+ gen_ip_hdr(ifobject, ip_hdr);
+ gen_udp_csum(udp_hdr, ip_hdr);
+ gen_eth_hdr(ifobject, eth_hdr);
+
+ return pkt;
+}
+
+static void pkt_dump(void *pkt, u32 len)
+{
+ char s[INET_ADDRSTRLEN];
+ struct ethhdr *ethhdr;
+ struct udphdr *udphdr;
+ struct iphdr *iphdr;
+ int payload, i;
+
+ ethhdr = pkt;
+ iphdr = pkt + sizeof(*ethhdr);
+ udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr);
+
+ /*extract L2 frame */
+ fprintf(stdout, "DEBUG>> L2: dst mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ fprintf(stdout, "%02X", ethhdr->h_dest[i]);
+
+ fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ fprintf(stdout, "%02X", ethhdr->h_source[i]);
+
+ /*extract L3 frame */
+ fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
+ /*extract L4 frame */
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
+ /*extract L5 frame */
+ payload = *((uint32_t *)(pkt + PKT_HDR_SIZE));
+
+ fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
+ fprintf(stdout, "---------------------------------------\n");
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *desc)
+{
+ void *data = xsk_umem__get_data(buffer, desc->addr);
+ struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+
+ if (!pkt) {
+ ksft_test_result_fail("ERROR: [%s] too many packets received\n", __func__);
+ return false;
+ }
+
+ if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
+ u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
+
+ if (opt_pkt_dump && test_type != TEST_TYPE_STATS)
+ pkt_dump(data, PKT_SIZE);
+
+ if (pkt->len != desc->len) {
+ ksft_test_result_fail
+ ("ERROR: [%s] expected length [%d], got length [%d]\n",
+ __func__, pkt->len, desc->len);
+ return false;
+ }
+
+ if (pkt->payload != seqnum) {
+ ksft_test_result_fail
+ ("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n",
+ __func__, pkt->payload, seqnum);
+ return false;
+ }
+ } else {
+ ksft_print_msg("Invalid frame received: ");
+ ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
+ iphdr->tos);
+ return false;
+ }
+
+ return true;
+}
+
static void kick_tx(struct xsk_socket_info *xsk)
{
int ret;
@@ -448,7 +553,7 @@ static void kick_tx(struct xsk_socket_info *xsk)
exit_with_error(errno);
}
-static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
{
unsigned int rcvd;
u32 idx;
@@ -463,133 +568,108 @@ static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
if (rcvd) {
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
xsk->outstanding_tx -= rcvd;
- xsk->tx_npkts += rcvd;
}
}
-static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
+static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk,
+ struct pollfd *fds)
{
- unsigned int rcvd, i;
- u32 idx_rx = 0, idx_fq = 0;
+ u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkt_count = 0;
+ struct pkt *pkt;
int ret;
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
- if (!rcvd) {
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret < 0)
- exit_with_error(ret);
+ pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
+ while (pkt) {
+ rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ if (!rcvd) {
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ exit_with_error(-ret);
+ }
+ continue;
}
- return;
- }
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
- while (ret != rcvd) {
- if (ret < 0)
- exit_with_error(ret);
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
- ret = poll(fds, 1, POLL_TMOUT);
+ ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+ while (ret != rcvd) {
if (ret < 0)
- exit_with_error(ret);
+ exit_with_error(-ret);
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ exit_with_error(-ret);
+ }
+ ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
}
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
- }
-
- for (i = 0; i < rcvd; i++) {
- u64 addr, orig;
-
- addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
- xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
- orig = xsk_umem__extract_addr(addr);
- addr = xsk_umem__add_offset_to_addr(addr);
- pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE);
- if (!pkt_node_rx)
- exit_with_error(errno);
+ for (i = 0; i < rcvd; i++) {
+ const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+ u64 addr = desc->addr, orig;
- pkt_node_rx->pkt_frame = malloc(PKT_SIZE);
- if (!pkt_node_rx->pkt_frame)
- exit_with_error(errno);
+ orig = xsk_umem__extract_addr(addr);
+ addr = xsk_umem__add_offset_to_addr(addr);
+ if (!is_pkt_valid(pkt, xsk->umem->buffer, desc))
+ return;
- memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr),
- PKT_SIZE);
-
- TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes);
+ *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+ pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
+ }
- *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+ xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+ xsk_ring_cons__release(&xsk->rx, rcvd);
}
-
- xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
- xsk_ring_cons__release(&xsk->rx, rcvd);
- xsk->rx_npkts += rcvd;
}
-static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
+static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
{
- u32 idx = 0;
- unsigned int i;
- bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID;
- u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE;
+ struct xsk_socket_info *xsk = ifobject->xsk;
+ u32 i, idx;
- while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
- complete_tx_only(xsk, batch_size);
+ while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE)
+ complete_pkts(xsk, BATCH_SIZE);
- for (i = 0; i < batch_size; i++) {
+ for (i = 0; i < BATCH_SIZE; i++) {
struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+ struct pkt *pkt = pkt_generate(ifobject, pkt_nb);
- tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
- tx_desc->len = len;
- }
+ if (!pkt)
+ break;
- xsk_ring_prod__submit(&xsk->tx, batch_size);
- if (!tx_invalid_test) {
- xsk->outstanding_tx += batch_size;
- } else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
- kick_tx(xsk);
+ tx_desc->addr = pkt->addr;
+ tx_desc->len = pkt->len;
+ pkt_nb++;
}
- *frameptr += batch_size;
- *frameptr %= num_frames;
- complete_tx_only(xsk, batch_size);
-}
-
-static int get_batch_size(int pkt_cnt)
-{
- if (!opt_pkt_count)
- return BATCH_SIZE;
- if (pkt_cnt + BATCH_SIZE <= opt_pkt_count)
- return BATCH_SIZE;
+ xsk_ring_prod__submit(&xsk->tx, i);
+ if (stat_test_type != STAT_TEST_TX_INVALID)
+ xsk->outstanding_tx += i;
+ else if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+ kick_tx(xsk);
+ complete_pkts(xsk, i);
- return opt_pkt_count - pkt_cnt;
+ return i;
}
-static void complete_tx_only_all(struct ifobject *ifobject)
+static void wait_for_tx_completion(struct xsk_socket_info *xsk)
{
- bool pending;
-
- do {
- pending = false;
- if (ifobject->xsk->outstanding_tx) {
- complete_tx_only(ifobject->xsk, BATCH_SIZE);
- pending = !!ifobject->xsk->outstanding_tx;
- }
- } while (pending);
+ while (xsk->outstanding_tx)
+ complete_pkts(xsk, BATCH_SIZE);
}
-static void tx_only_all(struct ifobject *ifobject)
+static void send_pkts(struct ifobject *ifobject)
{
struct pollfd fds[MAX_SOCKS] = { };
- u32 frame_nb = 0;
- int pkt_cnt = 0;
- int ret;
+ u32 pkt_cnt = 0;
fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
fds[0].events = POLLOUT;
- while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
- int batch_size = get_batch_size(pkt_cnt);
+ while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
+ u32 sent;
if (test_type == TEST_TYPE_POLL) {
+ int ret;
+
ret = poll(fds, 1, POLL_TMOUT);
if (ret <= 0)
continue;
@@ -598,78 +678,30 @@ static void tx_only_all(struct ifobject *ifobject)
continue;
}
- tx_only(ifobject->xsk, &frame_nb, batch_size);
- pkt_cnt += batch_size;
+ sent = __send_pkts(ifobject, pkt_cnt);
+ pkt_cnt += sent;
+ usleep(10);
}
- if (opt_pkt_count)
- complete_tx_only_all(ifobject);
+ wait_for_tx_completion(ifobject->xsk);
}
-static void worker_pkt_dump(void)
-{
- struct ethhdr *ethhdr;
- struct iphdr *iphdr;
- struct udphdr *udphdr;
- char s[128];
- int payload;
- void *ptr;
-
- fprintf(stdout, "---------------------------------------\n");
- for (int iter = 0; iter < num_frames - 1; iter++) {
- ptr = pkt_buf[iter]->payload;
- ethhdr = ptr;
- iphdr = ptr + sizeof(*ethhdr);
- udphdr = ptr + sizeof(*ethhdr) + sizeof(*iphdr);
-
- /*extract L2 frame */
- fprintf(stdout, "DEBUG>> L2: dst mac: ");
- for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ethhdr->h_dest[i]);
-
- fprintf(stdout, "\nDEBUG>> L2: src mac: ");
- for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ethhdr->h_source[i]);
-
- /*extract L3 frame */
- fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
- fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
- inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
- fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
- inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
- /*extract L4 frame */
- fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
- fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
- /*extract L5 frame */
- payload = *((uint32_t *)(ptr + PKT_HDR_SIZE));
-
- if (payload == EOT) {
- print_verbose("End-of-transmission frame received\n");
- fprintf(stdout, "---------------------------------------\n");
- break;
- }
- fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
- fprintf(stdout, "---------------------------------------\n");
- }
-}
-
-static void worker_stats_validate(struct ifobject *ifobject)
+static bool rx_stats_are_valid(struct ifobject *ifobject)
{
+ u32 xsk_stat = 0, expected_stat = ifobject->pkt_stream->nb_pkts;
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
struct xdp_statistics stats;
socklen_t optlen;
int err;
- struct xsk_socket *xsk = stat_test_type == STAT_TEST_TX_INVALID ?
- ifdict[!ifobject->ifdict_index]->xsk->xsk :
- ifobject->xsk->xsk;
- int fd = xsk_socket__fd(xsk);
- unsigned long xsk_stat = 0, expected_stat = opt_pkt_count;
-
- sigvar = 0;
optlen = sizeof(stats);
err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
- if (err)
- return;
+ if (err) {
+ ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return true;
+ }
if (optlen == sizeof(struct xdp_statistics)) {
switch (stat_test_type) {
@@ -677,8 +709,7 @@ static void worker_stats_validate(struct ifobject *ifobject)
xsk_stat = stats.rx_dropped;
break;
case STAT_TEST_TX_INVALID:
- xsk_stat = stats.tx_invalid_descs;
- break;
+ return true;
case STAT_TEST_RX_FULL:
xsk_stat = stats.rx_ring_full;
expected_stat -= RX_FULL_RXQSIZE;
@@ -691,99 +722,70 @@ static void worker_stats_validate(struct ifobject *ifobject)
}
if (xsk_stat == expected_stat)
- sigvar = 1;
+ return true;
}
+
+ return false;
}
-static void worker_pkt_validate(void)
+static void tx_stats_validate(struct ifobject *ifobject)
{
- u32 payloadseqnum = -2;
- struct iphdr *iphdr;
-
- while (1) {
- pkt_node_rx_q = TAILQ_LAST(&head, head_s);
- if (!pkt_node_rx_q)
- break;
-
- iphdr = (struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr));
-
- /*do not increment pktcounter if !(tos=0x9 and ipv4) */
- if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
- payloadseqnum = *((uint32_t *)(pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE));
- if (debug_pkt_dump && payloadseqnum != EOT) {
- pkt_obj = malloc(sizeof(*pkt_obj));
- pkt_obj->payload = malloc(PKT_SIZE);
- memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE);
- pkt_buf[payloadseqnum] = pkt_obj;
- }
-
- if (payloadseqnum == EOT) {
- print_verbose("End-of-transmission frame received: PASS\n");
- sigvar = 1;
- break;
- }
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
+ struct xdp_statistics stats;
+ socklen_t optlen;
+ int err;
- if (prev_pkt + 1 != payloadseqnum) {
- ksft_test_result_fail
- ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n",
- __func__, prev_pkt, payloadseqnum);
- ksft_exit_xfail();
- }
+ optlen = sizeof(stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+ if (err) {
+ ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return;
+ }
- prev_pkt = payloadseqnum;
- pkt_counter++;
- } else {
- ksft_print_msg("Invalid frame received: ");
- ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
- iphdr->tos);
- }
+ if (stats.tx_invalid_descs == ifobject->pkt_stream->nb_pkts)
+ return;
- TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
- free(pkt_node_rx_q->pkt_frame);
- free(pkt_node_rx_q);
- pkt_node_rx_q = NULL;
- }
+ ksft_test_result_fail("ERROR: [%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
+ __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
}
static void thread_common_ops(struct ifobject *ifobject, void *bufs)
{
- int umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ u64 umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ size_t mmap_sz = umem_sz;
int ctr = 0;
int ret;
ifobject->ns_fd = switch_namespace(ifobject->nsname);
if (test_type == TEST_TYPE_BPF_RES)
- umem_sz *= 2;
+ mmap_sz *= 2;
- bufs = mmap(NULL, umem_sz,
- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
if (bufs == MAP_FAILED)
exit_with_error(errno);
- xsk_configure_umem(ifobject, bufs, 0);
- ifobject->umem = ifobject->umem_arr[0];
- ret = xsk_configure_socket(ifobject, 0);
-
- /* Retry Create Socket if it fails as xsk_socket__create()
- * is asynchronous
- */
- while (ret && ctr < SOCK_RECONF_CTR) {
- xsk_configure_umem(ifobject, bufs, 0);
+ while (ctr++ < SOCK_RECONF_CTR) {
+ xsk_configure_umem(ifobject, bufs, umem_sz, 0);
ifobject->umem = ifobject->umem_arr[0];
ret = xsk_configure_socket(ifobject, 0);
+ if (!ret)
+ break;
+
+ /* Retry Create Socket if it fails as xsk_socket__create() is asynchronous */
usleep(USLEEP_MAX);
- ctr++;
+ if (ctr >= SOCK_RECONF_CTR)
+ exit_with_error(-ret);
}
- if (ctr >= SOCK_RECONF_CTR)
- exit_with_error(ret);
-
ifobject->umem = ifobject->umem_arr[0];
ifobject->xsk = ifobject->xsk_arr[0];
if (test_type == TEST_TYPE_BPF_RES) {
- xsk_configure_umem(ifobject, (u8 *)bufs + (umem_sz / 2), 1);
+ xsk_configure_umem(ifobject, (u8 *)bufs + umem_sz, umem_sz, 1);
ifobject->umem = ifobject->umem_arr[1];
ret = xsk_configure_socket(ifobject, 1);
}
@@ -809,33 +811,18 @@ static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
static void *worker_testapp_validate_tx(void *arg)
{
- struct udphdr *udp_hdr =
- (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
- struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
- struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
struct ifobject *ifobject = (struct ifobject *)arg;
- struct generic_data data;
void *bufs = NULL;
if (!second_step)
thread_common_ops(ifobject, bufs);
- for (int i = 0; i < num_frames; i++) {
- /*send EOT frame */
- if (i == (num_frames - 1))
- data.seqnum = -1;
- else
- data.seqnum = i;
- gen_udp_hdr(&data, ifobject, udp_hdr);
- gen_ip_hdr(ifobject, ip_hdr);
- gen_udp_csum(udp_hdr, ip_hdr);
- gen_eth_hdr(ifobject, eth_hdr);
- gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
- }
+ print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
+ ifobject->ifname);
+ send_pkts(ifobject);
- print_verbose("Sending %d packets on interface %s\n",
- (opt_pkt_count - 1), ifobject->ifname);
- tx_only_all(ifobject);
+ if (stat_test_type == STAT_TEST_TX_INVALID)
+ tx_stats_validate(ifobject);
testapp_cleanup_xsk_res(ifobject);
pthread_exit(NULL);
@@ -853,31 +840,16 @@ static void *worker_testapp_validate_rx(void *arg)
if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
xsk_populate_fill_ring(ifobject->umem);
- TAILQ_INIT(&head);
- if (debug_pkt_dump) {
- pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
- if (!pkt_buf)
- exit_with_error(errno);
- }
-
fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
fds[0].events = POLLIN;
pthread_barrier_wait(&barr);
- while (1) {
- if (test_type != TEST_TYPE_STATS) {
- rx_pkt(ifobject->xsk, fds);
- worker_pkt_validate();
- } else {
- worker_stats_validate(ifobject);
- }
- if (sigvar)
- break;
- }
-
- print_verbose("Received %d packets on interface %s\n",
- pkt_counter, ifobject->ifname);
+ if (test_type == TEST_TYPE_STATS)
+ while (!rx_stats_are_valid(ifobject))
+ continue;
+ else
+ receive_pkts(ifobject->pkt_stream, ifobject->xsk, fds);
if (test_type == TEST_TYPE_TEARDOWN)
print_verbose("Destroying socket\n");
@@ -890,10 +862,18 @@ static void testapp_validate(void)
{
bool bidi = test_type == TEST_TYPE_BIDI;
bool bpf = test_type == TEST_TYPE_BPF_RES;
+ struct pkt_stream *pkt_stream;
if (pthread_barrier_init(&barr, NULL, 2))
exit_with_error(errno);
+ if (stat_test_type == STAT_TEST_TX_INVALID)
+ pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
+ else
+ pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, PKT_SIZE);
+ ifdict_tx->pkt_stream = pkt_stream;
+ ifdict_rx->pkt_stream = pkt_stream;
+
/*Spawn RX thread */
pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
@@ -907,15 +887,6 @@ static void testapp_validate(void)
pthread_join(t1, NULL);
pthread_join(t0, NULL);
- if (debug_pkt_dump && test_type != TEST_TYPE_STATS) {
- worker_pkt_dump();
- for (int iter = 0; iter < num_frames - 1; iter++) {
- free(pkt_buf[iter]->payload);
- free(pkt_buf[iter]);
- }
- free(pkt_buf);
- }
-
if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
print_ksft_result();
}
@@ -925,9 +896,6 @@ static void testapp_teardown(void)
int i;
for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
}
@@ -953,9 +921,6 @@ static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
static void testapp_bidi(void)
{
for (int i = 0; i < MAX_BIDI_ITER; i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
if (!second_step) {
@@ -987,9 +952,6 @@ static void testapp_bpf_res(void)
int i;
for (i = 0; i < MAX_BPF_ITER; i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
if (!second_step)
@@ -1017,6 +979,8 @@ static void testapp_stats(void)
case STAT_TEST_RX_FULL:
rxqsize = RX_FULL_RXQSIZE;
break;
+ case STAT_TEST_TX_INVALID:
+ continue;
default:
break;
}
@@ -1062,10 +1026,7 @@ static void run_pkt_test(int mode, int type)
/* reset defaults after potential previous test */
xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
- pkt_counter = 0;
second_step = 0;
- prev_pkt = -1;
- sigvar = 0;
stat_test_type = -1;
rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
@@ -1102,62 +1063,70 @@ static void run_pkt_test(int mode, int type)
}
}
+static struct ifobject *ifobject_create(void)
+{
+ struct ifobject *ifobj;
+
+ ifobj = calloc(1, sizeof(struct ifobject));
+ if (!ifobj)
+ return NULL;
+
+ ifobj->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
+ if (!ifobj->xsk_arr)
+ goto out_xsk_arr;
+
+ ifobj->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
+ if (!ifobj->umem_arr)
+ goto out_umem_arr;
+
+ return ifobj;
+
+out_umem_arr:
+ free(ifobj->xsk_arr);
+out_xsk_arr:
+ free(ifobj);
+ return NULL;
+}
+
+static void ifobject_delete(struct ifobject *ifobj)
+{
+ free(ifobj->umem_arr);
+ free(ifobj->xsk_arr);
+ free(ifobj);
+}
+
int main(int argc, char **argv)
{
struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
- bool failure = false;
int i, j;
if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
exit_with_error(errno);
- for (int i = 0; i < MAX_INTERFACES; i++) {
- ifdict[i] = malloc(sizeof(struct ifobject));
+ for (i = 0; i < MAX_INTERFACES; i++) {
+ ifdict[i] = ifobject_create();
if (!ifdict[i])
- exit_with_error(errno);
-
- ifdict[i]->ifdict_index = i;
- ifdict[i]->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
- if (!ifdict[i]->xsk_arr) {
- failure = true;
- goto cleanup;
- }
- ifdict[i]->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
- if (!ifdict[i]->umem_arr) {
- failure = true;
- goto cleanup;
- }
+ exit_with_error(ENOMEM);
}
setlocale(LC_ALL, "");
parse_command_line(argc, argv);
- num_frames = ++opt_pkt_count;
-
- init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
- init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
+ init_iface(ifdict[tx], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
+ init_iface(ifdict[rx], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
- for (i = 0; i < TEST_MODE_MAX; i++) {
- for (j = 0; j < TEST_TYPE_MAX; j++)
+ for (i = 0; i < TEST_MODE_MAX; i++)
+ for (j = 0; j < TEST_TYPE_MAX; j++) {
run_pkt_test(i, j);
- }
-
-cleanup:
- for (int i = 0; i < MAX_INTERFACES; i++) {
- if (ifdict[i]->ns_fd != -1)
- close(ifdict[i]->ns_fd);
- free(ifdict[i]->xsk_arr);
- free(ifdict[i]->umem_arr);
- free(ifdict[i]);
- }
+ usleep(USLEEP_MAX);
+ }
- if (failure)
- exit_with_error(errno);
+ for (i = 0; i < MAX_INTERFACES; i++)
+ ifobject_delete(ifdict[i]);
ksft_exit_pass();
-
return 0;
}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 6c428b276ab6..7e49b9fbe25e 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -34,28 +34,23 @@
#define IP_PKT_TOS 0x9
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
-#define EOT (-1)
-#define USLEEP_MAX 200000
+#define USLEEP_MAX 10000
#define SOCK_RECONF_CTR 10
-#define BATCH_SIZE 64
+#define BATCH_SIZE 8
#define POLL_TMOUT 1000
-#define DEFAULT_PKT_CNT 10000
+#define DEFAULT_PKT_CNT (4 * 1024)
#define RX_FULL_RXQSIZE 32
+#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
-typedef __u32 u32;
-typedef __u16 u16;
-typedef __u8 u8;
-
-enum TEST_MODES {
- TEST_MODE_UNCONFIGURED = -1,
+enum test_mode {
TEST_MODE_SKB,
TEST_MODE_DRV,
TEST_MODE_MAX
};
-enum TEST_TYPES {
+enum test_type {
TEST_TYPE_NOPOLL,
TEST_TYPE_POLL,
TEST_TYPE_TEARDOWN,
@@ -65,7 +60,7 @@ enum TEST_TYPES {
TEST_TYPE_MAX
};
-enum STAT_TEST_TYPES {
+enum stat_test_type {
STAT_TEST_RX_DROPPED,
STAT_TEST_TX_INVALID,
STAT_TEST_RX_FULL,
@@ -73,21 +68,16 @@ enum STAT_TEST_TYPES {
STAT_TEST_TYPE_MAX
};
-static int configured_mode = TEST_MODE_UNCONFIGURED;
-static u8 debug_pkt_dump;
-static u32 num_frames;
+static int configured_mode;
+static bool opt_pkt_dump;
+static u32 num_frames = DEFAULT_PKT_CNT / 4;
static bool second_step;
static int test_type;
-static int opt_pkt_count;
-static u8 opt_verbose;
+static bool opt_verbose;
static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
-static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
-static u32 pkt_counter;
-static long prev_pkt = -1;
-static int sigvar;
static int stat_test_type;
static u32 rxqsize;
static u32 frame_headroom;
@@ -104,10 +94,6 @@ struct xsk_socket_info {
struct xsk_ring_prod tx;
struct xsk_umem_info *umem;
struct xsk_socket *xsk;
- unsigned long rx_npkts;
- unsigned long tx_npkts;
- unsigned long prev_rx_npkts;
- unsigned long prev_tx_npkts;
u32 outstanding_tx;
};
@@ -118,8 +104,15 @@ struct flow_vector {
} vector;
};
-struct generic_data {
- u32 seqnum;
+struct pkt {
+ u64 addr;
+ u32 len;
+ u32 payload;
+};
+
+struct pkt_stream {
+ u32 nb_pkts;
+ struct pkt *pkts;
};
struct ifobject {
@@ -131,8 +124,8 @@ struct ifobject {
struct xsk_umem_info *umem;
void *(*func_ptr)(void *arg);
struct flow_vector fv;
+ struct pkt_stream *pkt_stream;
int ns_fd;
- int ifdict_index;
u32 dst_ip;
u32 src_ip;
u16 src_port;
@@ -149,18 +142,4 @@ static struct ifobject *ifdict_tx;
pthread_barrier_t barr;
pthread_t t0, t1;
-TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
-struct head_s *head_p;
-struct pkt {
- char *pkt_frame;
-
- TAILQ_ENTRY(pkt) pkt_nodes;
-} *pkt_node_rx, *pkt_node_rx_q;
-
-struct pkt_frame {
- char *payload;
-} *pkt_obj;
-
-struct pkt_frame **pkt_buf;
-
#endif /* XDPXCEIVER_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index dac1c5f78752..bf29d2549bee 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -8,14 +8,8 @@ ksft_xfail=2
ksft_xpass=3
ksft_skip=4
-GREEN='\033[0;92m'
-YELLOW='\033[0;93m'
-RED='\033[0;31m'
-NC='\033[0m'
-STACK_LIM=131072
SPECFILE=veth.spec
XSKOBJ=xdpxceiver
-NUMPKTS=10000
validate_root_exec()
{
@@ -50,22 +44,12 @@ validate_veth_spec_file()
test_status()
{
statusval=$1
- if [ -n "${colorconsole+set}" ]; then
- if [ $statusval -eq 2 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${RED}FAIL${NC} ]"
- elif [ $statusval -eq 1 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${RED}SKIPPED${NC} ]"
- elif [ $statusval -eq 0 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${GREEN}PASS${NC} ]"
- fi
- else
- if [ $statusval -eq 2 ]; then
- echo -e "$2: [ FAIL ]"
- elif [ $statusval -eq 1 ]; then
- echo -e "$2: [ SKIPPED ]"
- elif [ $statusval -eq 0 ]; then
- echo -e "$2: [ PASS ]"
- fi
+ if [ $statusval -eq 2 ]; then
+ echo -e "$2: [ FAIL ]"
+ elif [ $statusval -eq 1 ]; then
+ echo -e "$2: [ SKIPPED ]"
+ elif [ $statusval -eq 0 ]; then
+ echo -e "$2: [ PASS ]"
fi
}
@@ -107,5 +91,5 @@ validate_ip_utility()
execxdpxceiver()
{
- ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} -C ${NUMPKTS} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
+ ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
}
diff --git a/tools/testing/selftests/cpufreq/config b/tools/testing/selftests/cpufreq/config
index 27ff72ebd0f5..75e900793e8a 100644
--- a/tools/testing/selftests/cpufreq/config
+++ b/tools/testing/selftests/cpufreq/config
@@ -6,7 +6,7 @@ CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_DEBUG_PI_LIST=y
+CONFIG_DEBUG_PLIST=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_DEBUG_LOCK_ALLOC=y
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 477cbb042f5b..0315955ff0f4 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -62,6 +62,9 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
struct binder_version version = { 0 };
char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX",
device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
+ static const char * const binder_features[] = {
+ "oneway_spam_detection",
+ };
change_mountns(_metadata);
@@ -150,6 +153,20 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
}
/* success: binder-control device removal failed as expected */
+
+ for (int i = 0; i < ARRAY_SIZE(binder_features); i++) {
+ snprintf(device_path, sizeof(device_path), "%s/features/%s",
+ binderfs_mntpt, binder_features[i]);
+ fd = open(device_path, O_CLOEXEC | O_RDONLY);
+ EXPECT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open binder feature: %s",
+ strerror(errno), binder_features[i]);
+ goto umount;
+ }
+ close(fd);
+ }
+
+ /* success: binder feature files found */
result = 0;
umount:
diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c
index 0e393cb5f42d..4c6f0cd83c5b 100644
--- a/tools/testing/selftests/firmware/fw_namespace.c
+++ b/tools/testing/selftests/firmware/fw_namespace.c
@@ -129,7 +129,8 @@ int main(int argc, char **argv)
die("mounting tmpfs to /lib/firmware failed\n");
sys_path = argv[1];
- asprintf(&fw_path, "/lib/firmware/%s", fw_name);
+ if (asprintf(&fw_path, "/lib/firmware/%s", fw_name) < 0)
+ die("error: failed to build full fw_path\n");
setup_fw(fw_path);
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index 013446e87f1f..38edea25631b 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -6,3 +6,5 @@ CONFIG_HARDENED_USERCOPY=y
# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+CONFIG_UBSAN_BOUNDS=y
+CONFIG_UBSAN_TRAP=y
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 846cfd508d3c..09f7bfa383cc 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -7,6 +7,7 @@ EXCEPTION
#EXHAUST_STACK Corrupts memory on failure
#CORRUPT_STACK Crashes entire system on success
#CORRUPT_STACK_STRONG Crashes entire system on success
+ARRAY_BOUNDS
CORRUPT_LIST_ADD list_add corruption
CORRUPT_LIST_DEL list_del corruption
STACK_GUARD_PAGE_LEADING
@@ -72,4 +73,6 @@ USERCOPY_KERNEL
STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
CFI_FORWARD_PROTO
FORTIFIED_STRSCPY
+FORTIFIED_OBJECT
+FORTIFIED_SUBOBJECT
PPC_SLB_MULTIHIT Recovered
diff --git a/tools/testing/selftests/move_mount_set_group/.gitignore b/tools/testing/selftests/move_mount_set_group/.gitignore
new file mode 100644
index 000000000000..f5e339268720
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/.gitignore
@@ -0,0 +1 @@
+move_mount_set_group_test
diff --git a/tools/testing/selftests/move_mount_set_group/Makefile b/tools/testing/selftests/move_mount_set_group/Makefile
new file mode 100644
index 000000000000..80c2d86812b0
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -g -I../../../../usr/include/ -Wall -O2
+
+TEST_GEN_FILES += move_mount_set_group_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/move_mount_set_group/config b/tools/testing/selftests/move_mount_set_group/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
new file mode 100644
index 000000000000..860198f83a53
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <sys/syscall.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef CLONE_NEWNS
+#define CLONE_NEWNS 0x00020000
+#endif
+
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1 << 20)
+#endif
+
+#ifndef MS_PRIVATE
+#define MS_PRIVATE (1<<18)
+#endif
+
+#ifndef MOVE_MOUNT_SET_GROUP
+#define MOVE_MOUNT_SET_GROUP 0x00000100
+#endif
+
+#ifndef MOVE_MOUNT_F_EMPTY_PATH
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004
+#endif
+
+#ifndef MOVE_MOUNT_T_EMPTY_PATH
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040
+#endif
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+static int prepare_unpriv_mountns(void)
+{
+ if (create_and_enter_userns())
+ return -1;
+
+ if (unshare(CLONE_NEWNS))
+ return -1;
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ return -1;
+
+ return 0;
+}
+
+static char *get_field(char *src, int nfields)
+{
+ int i;
+ char *p = src;
+
+ for (i = 0; i < nfields; i++) {
+ while (*p && *p != ' ' && *p != '\t')
+ p++;
+
+ if (!*p)
+ break;
+
+ p++;
+ }
+
+ return p;
+}
+
+static void null_endofword(char *word)
+{
+ while (*word && *word != ' ' && *word != '\t')
+ word++;
+ *word = '\0';
+}
+
+static bool is_shared_mount(const char *path)
+{
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f = NULL;
+
+ f = fopen("/proc/self/mountinfo", "re");
+ if (!f)
+ return false;
+
+ while (getline(&line, &len, f) != -1) {
+ char *opts, *target;
+
+ target = get_field(line, 4);
+ if (!target)
+ continue;
+
+ opts = get_field(target, 2);
+ if (!opts)
+ continue;
+
+ null_endofword(target);
+
+ if (strcmp(target, path) != 0)
+ continue;
+
+ null_endofword(opts);
+ if (strstr(opts, "shared:"))
+ return true;
+ }
+
+ free(line);
+ fclose(f);
+
+ return false;
+}
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
+#define SET_GROUP_FROM "/tmp/move_mount_set_group_supported_from"
+#define SET_GROUP_TO "/tmp/move_mount_set_group_supported_to"
+
+static int move_mount_set_group_supported(void)
+{
+ int ret;
+
+ if (mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"))
+ return -1;
+
+ if (mount(NULL, "/tmp", NULL, MS_PRIVATE, 0))
+ return -1;
+
+ if (mkdir(SET_GROUP_FROM, 0777))
+ return -1;
+
+ if (mkdir(SET_GROUP_TO, 0777))
+ return -1;
+
+ if (mount("testing", SET_GROUP_FROM, "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"))
+ return -1;
+
+ if (mount(SET_GROUP_FROM, SET_GROUP_TO, NULL, MS_BIND, NULL))
+ return -1;
+
+ if (mount(NULL, SET_GROUP_FROM, NULL, MS_SHARED, 0))
+ return -1;
+
+ ret = syscall(SYS_move_mount, AT_FDCWD, SET_GROUP_FROM,
+ AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP);
+ umount2("/tmp", MNT_DETACH);
+
+ return ret < 0 ? false : true;
+}
+
+FIXTURE(move_mount_set_group) {
+};
+
+#define SET_GROUP_A "/tmp/A"
+
+FIXTURE_SETUP(move_mount_set_group)
+{
+ int ret;
+
+ ASSERT_EQ(prepare_unpriv_mountns(), 0);
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir(SET_GROUP_A, 0777), 0);
+
+ ASSERT_EQ(mount("testing", SET_GROUP_A, "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+}
+
+FIXTURE_TEARDOWN(move_mount_set_group)
+{
+ int ret;
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ umount2("/tmp", MNT_DETACH);
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+struct child_args {
+ int unsfd;
+ int mntnsfd;
+ bool shared;
+ int mntfd;
+};
+
+static int get_nestedns_mount_cb(void *data)
+{
+ struct child_args *ca = (struct child_args *)data;
+ int ret;
+
+ ret = prepare_unpriv_mountns();
+ if (ret)
+ return 1;
+
+ if (ca->shared) {
+ ret = mount(NULL, SET_GROUP_A, NULL, MS_SHARED, 0);
+ if (ret)
+ return 1;
+ }
+
+ ret = open("/proc/self/ns/user", O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->unsfd = ret;
+
+ ret = open("/proc/self/ns/mnt", O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->mntnsfd = ret;
+
+ ret = open(SET_GROUP_A, O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->mntfd = ret;
+
+ return 0;
+}
+
+TEST_F(move_mount_set_group, complex_sharing_copying)
+{
+ struct child_args ca_from = {
+ .shared = true,
+ };
+ struct child_args ca_to = {
+ .shared = false,
+ };
+ pid_t pid;
+ int ret;
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ pid = do_clone(get_nestedns_mount_cb, (void *)&ca_from, CLONE_VFORK |
+ CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
+ ASSERT_EQ(wait_for_pid(pid), 0);
+
+ pid = do_clone(get_nestedns_mount_cb, (void *)&ca_to, CLONE_VFORK |
+ CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
+ ASSERT_EQ(wait_for_pid(pid), 0);
+
+ ASSERT_EQ(syscall(SYS_move_mount, ca_from.mntfd, "",
+ ca_to.mntfd, "", MOVE_MOUNT_SET_GROUP
+ | MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH),
+ 0);
+
+ ASSERT_EQ(setns(ca_to.mntnsfd, CLONE_NEWNS), 0);
+ ASSERT_EQ(is_shared_mount(SET_GROUP_A), 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c
index 57b505cb1561..e1bf55dabdf6 100644
--- a/tools/testing/selftests/nci/nci_dev.c
+++ b/tools/testing/selftests/nci/nci_dev.c
@@ -57,6 +57,29 @@ const __u8 nci_init_rsp_v2[] = {0x40, 0x01, 0x1c, 0x00, 0x1a, 0x7e, 0x06,
const __u8 nci_rf_disc_map_rsp[] = {0x41, 0x00, 0x01, 0x00};
const __u8 nci_rf_disc_rsp[] = {0x41, 0x03, 0x01, 0x00};
const __u8 nci_rf_deact_rsp[] = {0x41, 0x06, 0x01, 0x00};
+const __u8 nci_rf_deact_ntf[] = {0x61, 0x06, 0x02, 0x00, 0x00};
+const __u8 nci_rf_activate_ntf[] = {0x61, 0x05, 0x1D, 0x01, 0x02, 0x04, 0x00,
+ 0xFF, 0xFF, 0x0C, 0x44, 0x03, 0x07, 0x04,
+ 0x62, 0x26, 0x11, 0x80, 0x1D, 0x80, 0x01,
+ 0x20, 0x00, 0x00, 0x00, 0x06, 0x05, 0x75,
+ 0x77, 0x81, 0x02, 0x80};
+const __u8 nci_t4t_select_cmd[] = {0x00, 0x00, 0x0C, 0x00, 0xA4, 0x04, 0x00,
+ 0x07, 0xD2, 0x76, 0x00, 0x00, 0x85, 0x01, 0x01};
+const __u8 nci_t4t_select_cmd2[] = {0x00, 0x00, 0x07, 0x00, 0xA4, 0x00, 0x0C, 0x02,
+ 0xE1, 0x03};
+const __u8 nci_t4t_select_cmd3[] = {0x00, 0x00, 0x07, 0x00, 0xA4, 0x00, 0x0C, 0x02,
+ 0xE1, 0x04};
+const __u8 nci_t4t_read_cmd[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x00, 0x0F};
+const __u8 nci_t4t_read_rsp[] = {0x00, 0x00, 0x11, 0x00, 0x0F, 0x20, 0x00, 0x3B,
+ 0x00, 0x34, 0x04, 0x06, 0xE1, 0x04, 0x08, 0x00,
+ 0x00, 0x00, 0x90, 0x00};
+const __u8 nci_t4t_read_cmd2[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x00, 0x02};
+const __u8 nci_t4t_read_rsp2[] = {0x00, 0x00, 0x04, 0x00, 0x0F, 0x90, 0x00};
+const __u8 nci_t4t_read_cmd3[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x02, 0x0F};
+const __u8 nci_t4t_read_rsp3[] = {0x00, 0x00, 0x11, 0xD1, 0x01, 0x0B, 0x54, 0x02,
+ 0x65, 0x6E, 0x4E, 0x46, 0x43, 0x20, 0x54, 0x45,
+ 0x53, 0x54, 0x90, 0x00};
+const __u8 nci_t4t_rsp_ok[] = {0x00, 0x00, 0x02, 0x90, 0x00};
struct msgtemplate {
struct nlmsghdr n;
@@ -87,7 +110,7 @@ error:
static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
__u8 genl_cmd, int nla_num, __u16 nla_type[],
- void *nla_data[], int nla_len[])
+ void *nla_data[], int nla_len[], __u16 flags)
{
struct sockaddr_nl nladdr;
struct msgtemplate msg;
@@ -98,7 +121,7 @@ static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
msg.n.nlmsg_type = nlmsg_type;
- msg.n.nlmsg_flags = NLM_F_REQUEST;
+ msg.n.nlmsg_flags = flags;
msg.n.nlmsg_seq = 0;
msg.n.nlmsg_pid = nlmsg_pid;
msg.g.cmd = genl_cmd;
@@ -110,11 +133,11 @@ static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
na->nla_type = nla_type[cnt];
na->nla_len = nla_len[cnt] + NLA_HDRLEN;
- if (nla_len > 0)
+ if (nla_len[cnt] > 0)
memcpy(NLA_DATA(na), nla_data[cnt], nla_len[cnt]);
- msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
- prv_len = na->nla_len;
+ prv_len = NLA_ALIGN(nla_len[cnt]) + NLA_HDRLEN;
+ msg.n.nlmsg_len += prv_len;
}
buf = (char *)&msg;
@@ -146,11 +169,11 @@ static int send_get_nfc_family(int sd, __u32 pid)
nla_get_family_data = family_name;
return send_cmd_mt_nla(sd, GENL_ID_CTRL, pid, CTRL_CMD_GETFAMILY,
- 1, &nla_get_family_type,
- &nla_get_family_data, &nla_get_family_len);
+ 1, &nla_get_family_type, &nla_get_family_data,
+ &nla_get_family_len, NLM_F_REQUEST);
}
-static int get_family_id(int sd, __u32 pid)
+static int get_family_id(int sd, __u32 pid, __u32 *event_group)
{
struct {
struct nlmsghdr n;
@@ -158,8 +181,9 @@ static int get_family_id(int sd, __u32 pid)
char buf[512];
} ans;
struct nlattr *na;
- int rep_len;
+ int resp_len;
__u16 id;
+ int len;
int rc;
rc = send_get_nfc_family(sd, pid);
@@ -167,17 +191,49 @@ static int get_family_id(int sd, __u32 pid)
if (rc < 0)
return 0;
- rep_len = recv(sd, &ans, sizeof(ans), 0);
+ resp_len = recv(sd, &ans, sizeof(ans), 0);
- if (ans.n.nlmsg_type == NLMSG_ERROR || rep_len < 0 ||
- !NLMSG_OK(&ans.n, rep_len))
+ if (ans.n.nlmsg_type == NLMSG_ERROR || resp_len < 0 ||
+ !NLMSG_OK(&ans.n, resp_len))
return 0;
+ len = 0;
+ resp_len = GENLMSG_PAYLOAD(&ans.n);
na = (struct nlattr *)GENLMSG_DATA(&ans);
- na = (struct nlattr *)((char *)na + NLA_ALIGN(na->nla_len));
- if (na->nla_type == CTRL_ATTR_FAMILY_ID)
- id = *(__u16 *)NLA_DATA(na);
+ while (len < resp_len) {
+ len += NLA_ALIGN(na->nla_len);
+ if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
+ id = *(__u16 *)NLA_DATA(na);
+ } else if (na->nla_type == CTRL_ATTR_MCAST_GROUPS) {
+ struct nlattr *nested_na;
+ struct nlattr *group_na;
+ int group_attr_len;
+ int group_attr;
+
+ nested_na = (struct nlattr *)((char *)na + NLA_HDRLEN);
+ group_na = (struct nlattr *)((char *)nested_na + NLA_HDRLEN);
+ group_attr_len = 0;
+
+ for (group_attr = CTRL_ATTR_MCAST_GRP_UNSPEC;
+ group_attr < CTRL_ATTR_MCAST_GRP_MAX; group_attr++) {
+ if (group_na->nla_type == CTRL_ATTR_MCAST_GRP_ID) {
+ *event_group = *(__u32 *)((char *)group_na +
+ NLA_HDRLEN);
+ break;
+ }
+
+ group_attr_len += NLA_ALIGN(group_na->nla_len) +
+ NLA_HDRLEN;
+ if (group_attr_len >= nested_na->nla_len)
+ break;
+
+ group_na = (struct nlattr *)((char *)group_na +
+ NLA_ALIGN(group_na->nla_len));
+ }
+ }
+ na = (struct nlattr *)(GENLMSG_DATA(&ans) + len);
+ }
return id;
}
@@ -189,12 +245,12 @@ static int send_cmd_with_idx(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
int nla_len = 4;
return send_cmd_mt_nla(sd, nlmsg_type, nlmsg_pid, genl_cmd, 1,
- &nla_type, &nla_data, &nla_len);
+ &nla_type, &nla_data, &nla_len, NLM_F_REQUEST);
}
static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtemplate *msg)
{
- int rc, rep_len;
+ int rc, resp_len;
rc = send_cmd_with_idx(sd, fid, pid, NFC_CMD_GET_DEVICE, dev_id);
if (rc < 0) {
@@ -202,14 +258,14 @@ static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtem
goto error;
}
- rep_len = recv(sd, msg, sizeof(*msg), 0);
- if (rep_len < 0) {
+ resp_len = recv(sd, msg, sizeof(*msg), 0);
+ if (resp_len < 0) {
rc = -2;
goto error;
}
if (msg->n.nlmsg_type == NLMSG_ERROR ||
- !NLMSG_OK(&msg->n, rep_len)) {
+ !NLMSG_OK(&msg->n, resp_len)) {
rc = -3;
goto error;
}
@@ -222,21 +278,21 @@ error:
static __u8 get_dev_enable_state(struct msgtemplate *msg)
{
struct nlattr *na;
- int rep_len;
+ int resp_len;
int len;
- rep_len = GENLMSG_PAYLOAD(&msg->n);
+ resp_len = GENLMSG_PAYLOAD(&msg->n);
na = (struct nlattr *)GENLMSG_DATA(msg);
len = 0;
- while (len < rep_len) {
+ while (len < resp_len) {
len += NLA_ALIGN(na->nla_len);
if (na->nla_type == NFC_ATTR_DEVICE_POWERED)
return *(char *)NLA_DATA(na);
na = (struct nlattr *)(GENLMSG_DATA(msg) + len);
}
- return rep_len;
+ return resp_len;
}
FIXTURE(NCI) {
@@ -270,8 +326,7 @@ static void *virtual_dev_open(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_reset_cmd))
@@ -280,8 +335,7 @@ static void *virtual_dev_open(void *data)
goto error;
write(dev_fd, nci_reset_rsp, sizeof(nci_reset_rsp));
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_init_cmd))
@@ -290,8 +344,7 @@ static void *virtual_dev_open(void *data)
goto error;
write(dev_fd, nci_init_rsp, sizeof(nci_init_rsp));
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_rf_disc_map_cmd))
@@ -313,8 +366,7 @@ static void *virtual_dev_open_v2(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_reset_cmd))
@@ -324,8 +376,7 @@ static void *virtual_dev_open_v2(void *data)
write(dev_fd, nci_reset_rsp_v2, sizeof(nci_reset_rsp_v2));
write(dev_fd, nci_reset_ntf, sizeof(nci_reset_ntf));
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_init_cmd_v2))
@@ -334,8 +385,7 @@ static void *virtual_dev_open_v2(void *data)
goto error;
write(dev_fd, nci_init_rsp_v2, sizeof(nci_init_rsp_v2));
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_rf_disc_map_cmd))
@@ -353,6 +403,7 @@ FIXTURE_SETUP(NCI)
{
struct msgtemplate msg;
pthread_t thread_t;
+ __u32 event_group;
int status;
int rc;
@@ -364,12 +415,16 @@ FIXTURE_SETUP(NCI)
ASSERT_NE(self->sd, -1);
self->pid = getpid();
- self->fid = get_family_id(self->sd, self->pid);
+ self->fid = get_family_id(self->sd, self->pid, &event_group);
ASSERT_NE(self->fid, -1);
self->virtual_nci_fd = open("/dev/virtual_nci", O_RDWR);
ASSERT_GT(self->virtual_nci_fd, -1);
+ rc = setsockopt(self->sd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &event_group,
+ sizeof(event_group));
+ ASSERT_NE(rc, -1);
+
rc = ioctl(self->virtual_nci_fd, IOCTL_GET_NCIDEV_IDX, &self->dev_idex);
ASSERT_EQ(rc, 0);
@@ -402,8 +457,7 @@ static void *virtual_deinit(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_reset_cmd))
@@ -425,8 +479,7 @@ static void *virtual_deinit_v2(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_reset_cmd))
@@ -489,16 +542,14 @@ static void *virtual_poll_start(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_rf_discovery_cmd))
goto error;
if (memcmp(nci_rf_discovery_cmd, buf, len))
goto error;
- write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp))
- ;
+ write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp));
return (void *)0;
error:
@@ -513,8 +564,7 @@ static void *virtual_poll_stop(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_rf_deact_cmd))
@@ -528,38 +578,282 @@ error:
return (void *)-1;
}
-TEST_F(NCI, start_poll)
+int start_polling(int dev_idx, int proto, int virtual_fd, int sd, int fid, int pid)
{
__u16 nla_start_poll_type[2] = {NFC_ATTR_DEVICE_INDEX,
NFC_ATTR_PROTOCOLS};
- void *nla_start_poll_data[2] = {&self->dev_idex, &self->proto};
+ void *nla_start_poll_data[2] = {&dev_idx, &proto};
int nla_start_poll_len[2] = {4, 4};
pthread_t thread_t;
int status;
int rc;
rc = pthread_create(&thread_t, NULL, virtual_poll_start,
- (void *)&self->virtual_nci_fd);
- ASSERT_GT(rc, -1);
+ (void *)&virtual_fd);
+ if (rc < 0)
+ return rc;
- rc = send_cmd_mt_nla(self->sd, self->fid, self->pid,
- NFC_CMD_START_POLL, 2, nla_start_poll_type,
- nla_start_poll_data, nla_start_poll_len);
- EXPECT_EQ(rc, 0);
+ rc = send_cmd_mt_nla(sd, fid, pid, NFC_CMD_START_POLL, 2, nla_start_poll_type,
+ nla_start_poll_data, nla_start_poll_len, NLM_F_REQUEST);
+ if (rc != 0)
+ return rc;
pthread_join(thread_t, (void **)&status);
- ASSERT_EQ(status, 0);
+ return status;
+}
+
+int stop_polling(int dev_idx, int virtual_fd, int sd, int fid, int pid)
+{
+ pthread_t thread_t;
+ int status;
+ int rc;
rc = pthread_create(&thread_t, NULL, virtual_poll_stop,
- (void *)&self->virtual_nci_fd);
- ASSERT_GT(rc, -1);
+ (void *)&virtual_fd);
+ if (rc < 0)
+ return rc;
- rc = send_cmd_with_idx(self->sd, self->fid, self->pid,
- NFC_CMD_STOP_POLL, self->dev_idex);
- EXPECT_EQ(rc, 0);
+ rc = send_cmd_with_idx(sd, fid, pid,
+ NFC_CMD_STOP_POLL, dev_idx);
+ if (rc != 0)
+ return rc;
pthread_join(thread_t, (void **)&status);
+ return status;
+}
+
+TEST_F(NCI, start_poll)
+{
+ int status;
+
+ status = start_polling(self->dev_idex, self->proto, self->virtual_nci_fd,
+ self->sd, self->fid, self->pid);
+ EXPECT_EQ(status, 0);
+
+ status = stop_polling(self->dev_idex, self->virtual_nci_fd, self->sd,
+ self->fid, self->pid);
+ EXPECT_EQ(status, 0);
+}
+
+int get_taginfo(int dev_idx, int sd, int fid, int pid)
+{
+ struct {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[512];
+ } ans;
+
+ struct nlattr *na;
+ __u32 protocol;
+ int targetidx;
+ __u8 sel_res;
+ int resp_len;
+ int len;
+
+ __u16 tagid_type;
+ void *tagid_type_data;
+ int tagid_len;
+
+ tagid_type = NFC_ATTR_DEVICE_INDEX;
+ tagid_type_data = &dev_idx;
+ tagid_len = 4;
+
+ send_cmd_mt_nla(sd, fid, pid, NFC_CMD_GET_TARGET, 1, &tagid_type,
+ &tagid_type_data, &tagid_len, NLM_F_REQUEST | NLM_F_DUMP);
+ resp_len = recv(sd, &ans, sizeof(ans), 0);
+ if (ans.n.nlmsg_type == NLMSG_ERROR || resp_len < 0 ||
+ !NLMSG_OK(&ans.n, resp_len))
+ return -1;
+
+ resp_len = GENLMSG_PAYLOAD(&ans.n);
+ na = (struct nlattr *)GENLMSG_DATA(&ans);
+
+ len = 0;
+ targetidx = -1;
+ protocol = -1;
+ sel_res = -1;
+
+ while (len < resp_len) {
+ len += NLA_ALIGN(na->nla_len);
+
+ if (na->nla_type == NFC_ATTR_TARGET_INDEX)
+ targetidx = *(int *)((char *)na + NLA_HDRLEN);
+ else if (na->nla_type == NFC_ATTR_TARGET_SEL_RES)
+ sel_res = *(__u8 *)((char *)na + NLA_HDRLEN);
+ else if (na->nla_type == NFC_ATTR_PROTOCOLS)
+ protocol = *(__u32 *)((char *)na + NLA_HDRLEN);
+
+ na = (struct nlattr *)(GENLMSG_DATA(&ans) + len);
+ }
+
+ if (targetidx == -1 || sel_res != 0x20 || protocol != NFC_PROTO_ISO14443_MASK)
+ return -1;
+
+ return targetidx;
+}
+
+int connect_socket(int dev_idx, int target_idx)
+{
+ struct sockaddr_nfc addr;
+ int sock;
+ int err = 0;
+
+ sock = socket(AF_NFC, SOCK_SEQPACKET, NFC_SOCKPROTO_RAW);
+ if (sock == -1)
+ return -1;
+
+ addr.sa_family = AF_NFC;
+ addr.dev_idx = dev_idx;
+ addr.target_idx = target_idx;
+ addr.nfc_protocol = NFC_PROTO_ISO14443;
+
+ err = connect(sock, (struct sockaddr *)&addr, sizeof(addr));
+ if (err) {
+ close(sock);
+ return -1;
+ }
+
+ return sock;
+}
+
+int connect_tag(int dev_idx, int virtual_fd, int sd, int fid, int pid)
+{
+ struct genlmsghdr *genlhdr;
+ struct nlattr *na;
+ char evt_data[255];
+ int target_idx;
+ int resp_len;
+ int evt_dev;
+
+ write(virtual_fd, nci_rf_activate_ntf, sizeof(nci_rf_activate_ntf));
+ resp_len = recv(sd, evt_data, sizeof(evt_data), 0);
+ if (resp_len < 0)
+ return -1;
+
+ genlhdr = (struct genlmsghdr *)((struct nlmsghdr *)evt_data + 1);
+ na = (struct nlattr *)(genlhdr + 1);
+ evt_dev = *(int *)((char *)na + NLA_HDRLEN);
+ if (dev_idx != evt_dev)
+ return -1;
+
+ target_idx = get_taginfo(dev_idx, sd, fid, pid);
+ if (target_idx == -1)
+ return -1;
+ return connect_socket(dev_idx, target_idx);
+}
+
+int read_write_nci_cmd(int nfc_sock, int virtual_fd, const __u8 *cmd, __u32 cmd_len,
+ const __u8 *rsp, __u32 rsp_len)
+{
+ char buf[256];
+ unsigned int len;
+
+ send(nfc_sock, &cmd[3], cmd_len - 3, 0);
+ len = read(virtual_fd, buf, cmd_len);
+ if (len < 0 || memcmp(buf, cmd, cmd_len))
+ return -1;
+
+ write(virtual_fd, rsp, rsp_len);
+ len = recv(nfc_sock, buf, rsp_len - 2, 0);
+ if (len < 0 || memcmp(&buf[1], &rsp[3], rsp_len - 3))
+ return -1;
+
+ return 0;
+}
+
+int read_tag(int nfc_sock, int virtual_fd)
+{
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd,
+ sizeof(nci_t4t_select_cmd), nci_t4t_rsp_ok,
+ sizeof(nci_t4t_rsp_ok)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd2,
+ sizeof(nci_t4t_select_cmd2), nci_t4t_rsp_ok,
+ sizeof(nci_t4t_rsp_ok)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd,
+ sizeof(nci_t4t_read_cmd), nci_t4t_read_rsp,
+ sizeof(nci_t4t_read_rsp)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd3,
+ sizeof(nci_t4t_select_cmd3), nci_t4t_rsp_ok,
+ sizeof(nci_t4t_rsp_ok)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd2,
+ sizeof(nci_t4t_read_cmd2), nci_t4t_read_rsp2,
+ sizeof(nci_t4t_read_rsp2)))
+ return -1;
+
+ return read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd3,
+ sizeof(nci_t4t_read_cmd3), nci_t4t_read_rsp3,
+ sizeof(nci_t4t_read_rsp3));
+}
+
+static void *virtual_deactivate_proc(void *data)
+{
+ int virtual_fd;
+ char buf[256];
+ int deactcmd_len;
+ int len;
+
+ virtual_fd = *(int *)data;
+ deactcmd_len = sizeof(nci_rf_deact_cmd);
+ len = read(virtual_fd, buf, deactcmd_len);
+ if (len != deactcmd_len || memcmp(buf, nci_rf_deact_cmd, deactcmd_len))
+ return (void *)-1;
+
+ write(virtual_fd, nci_rf_deact_rsp, sizeof(nci_rf_deact_rsp));
+ write(virtual_fd, nci_rf_deact_ntf, sizeof(nci_rf_deact_ntf));
+
+ return (void *)0;
+}
+
+int disconnect_tag(int nfc_sock, int virtual_fd)
+{
+ pthread_t thread_t;
+ char buf[256];
+ int status;
+ int len;
+
+ send(nfc_sock, &nci_t4t_select_cmd3[3], sizeof(nci_t4t_select_cmd3) - 3, 0);
+ len = read(virtual_fd, buf, sizeof(nci_t4t_select_cmd3));
+ if (len < 0 || memcmp(buf, nci_t4t_select_cmd3, sizeof(nci_t4t_select_cmd3)))
+ return -1;
+
+ len = recv(nfc_sock, buf, sizeof(nci_t4t_rsp_ok), 0);
+ if (len != -1)
+ return -1;
+
+ status = pthread_create(&thread_t, NULL, virtual_deactivate_proc,
+ (void *)&virtual_fd);
+
+ close(nfc_sock);
+ pthread_join(thread_t, (void **)&status);
+ return status;
+}
+
+TEST_F(NCI, t4t_tag_read)
+{
+ int nfc_sock;
+ int status;
+
+ status = start_polling(self->dev_idex, self->proto, self->virtual_nci_fd,
+ self->sd, self->fid, self->pid);
+ EXPECT_EQ(status, 0);
+
+ nfc_sock = connect_tag(self->dev_idex, self->virtual_nci_fd, self->sd,
+ self->fid, self->pid);
+ ASSERT_GT(nfc_sock, -1);
+
+ status = read_tag(nfc_sock, self->virtual_nci_fd);
ASSERT_EQ(status, 0);
+
+ status = disconnect_tag(nfc_sock, self->virtual_nci_fd);
+ EXPECT_EQ(status, 0);
}
TEST_F(NCI, deinit)
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 79c9eb0034d5..378c0aac5a1a 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -25,6 +25,8 @@ TEST_PROGS += bareudp.sh
TEST_PROGS += unicast_extensions.sh
TEST_PROGS += udpgro_fwd.sh
TEST_PROGS += veth.sh
+TEST_PROGS += ioam6.sh
+TEST_PROGS += gro.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -36,8 +38,11 @@ TEST_GEN_FILES += fin_ack_lat
TEST_GEN_FILES += reuseaddr_ports_exhausted
TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
TEST_GEN_FILES += ipsec
+TEST_GEN_FILES += ioam6_parser
+TEST_GEN_FILES += gro
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_FILES += toeplitz
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
new file mode 100644
index 000000000000..cfc7f4f97fd1
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -0,0 +1,5 @@
+##TEST_GEN_FILES := test_unix_oob
+TEST_PROGS := test_unix_oob
+include ../../lib.mk
+
+all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
new file mode 100644
index 000000000000..0f3e3763f4f8
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -0,0 +1,437 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/poll.h>
+
+static int pipefd[2];
+static int signal_recvd;
+static pid_t producer_id;
+static char sock_name[32];
+
+static void sig_hand(int sn, siginfo_t *si, void *p)
+{
+ signal_recvd = sn;
+}
+
+static int set_sig_handler(int signal)
+{
+ struct sigaction sa;
+
+ sa.sa_sigaction = sig_hand;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO | SA_RESTART;
+
+ return sigaction(signal, &sa, NULL);
+}
+
+static void set_filemode(int fd, int set)
+{
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ if (set)
+ flags &= ~O_NONBLOCK;
+ else
+ flags |= O_NONBLOCK;
+ fcntl(fd, F_SETFL, flags);
+}
+
+static void signal_producer(int fd)
+{
+ char cmd;
+
+ cmd = 'S';
+ write(fd, &cmd, sizeof(cmd));
+}
+
+static void wait_for_signal(int fd)
+{
+ char buf[5];
+
+ read(fd, buf, 5);
+}
+
+static void die(int status)
+{
+ fflush(NULL);
+ unlink(sock_name);
+ kill(producer_id, SIGTERM);
+ exit(status);
+}
+
+int is_sioctatmark(int fd)
+{
+ int ans = -1;
+
+ if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
+#ifdef DEBUG
+ perror("SIOCATMARK Failed");
+#endif
+ }
+ return ans;
+}
+
+void read_oob(int fd, char *c)
+{
+
+ *c = ' ';
+ if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
+#ifdef DEBUG
+ perror("Reading MSG_OOB Failed");
+#endif
+ }
+}
+
+int read_data(int pfd, char *buf, int size)
+{
+ int len = 0;
+
+ memset(buf, size, '0');
+ len = read(pfd, buf, size);
+#ifdef DEBUG
+ if (len < 0)
+ perror("read failed");
+#endif
+ return len;
+}
+
+static void wait_for_data(int pfd, int event)
+{
+ struct pollfd pfds[1];
+
+ pfds[0].fd = pfd;
+ pfds[0].events = event;
+ poll(pfds, 1, -1);
+}
+
+void producer(struct sockaddr_un *consumer_addr)
+{
+ int cfd;
+ char buf[64];
+ int i;
+
+ memset(buf, 'x', sizeof(buf));
+ cfd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+ wait_for_signal(pipefd[0]);
+ if (connect(cfd, (struct sockaddr *)consumer_addr,
+ sizeof(struct sockaddr)) != 0) {
+ perror("Connect failed");
+ kill(0, SIGTERM);
+ exit(1);
+ }
+
+ for (i = 0; i < 2; i++) {
+ /* Test 1: Test for SIGURG and OOB */
+ wait_for_signal(pipefd[0]);
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '@';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 2: Test for OOB being overwitten */
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '%';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '#';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 3: Test for SIOCATMARK */
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '@';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '%';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ send(cfd, buf, sizeof(buf), 0);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 4: Test for 1byte OOB msg */
+ memset(buf, 'x', sizeof(buf));
+ buf[0] = '@';
+ send(cfd, buf, 1, MSG_OOB);
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int lfd, pfd;
+ struct sockaddr_un consumer_addr, paddr;
+ socklen_t len = sizeof(consumer_addr);
+ char buf[1024];
+ int on = 0;
+ char oob;
+ int flags;
+ int atmark;
+ char *tmp_file;
+
+ lfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ memset(&consumer_addr, 0, sizeof(consumer_addr));
+ consumer_addr.sun_family = AF_UNIX;
+ sprintf(sock_name, "unix_oob_%d", getpid());
+ unlink(sock_name);
+ strcpy(consumer_addr.sun_path, sock_name);
+
+ if ((bind(lfd, (struct sockaddr *)&consumer_addr,
+ sizeof(consumer_addr))) != 0) {
+ perror("socket bind failed");
+ exit(1);
+ }
+
+ pipe(pipefd);
+
+ listen(lfd, 1);
+
+ producer_id = fork();
+ if (producer_id == 0) {
+ producer(&consumer_addr);
+ exit(0);
+ }
+
+ set_sig_handler(SIGURG);
+ signal_producer(pipefd[1]);
+
+ pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
+ fcntl(pfd, F_SETOWN, getpid());
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 1:
+ * veriyf that SIGURG is
+ * delivered and 63 bytes are
+ * read and oob is '@'
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ read_oob(pfd, &oob);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 63 || oob != '@') {
+ fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 2:
+ * Verify that the first OOB is over written by
+ * the 2nd one and the first OOB is returned as
+ * part of the read, and sigurg is received.
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = 0;
+ while (len < 70)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ read_oob(pfd, &oob);
+ if (!signal_recvd || len != 127 || oob != '#') {
+ fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 3:
+ * verify that 2nd oob over writes
+ * the first one and read breaks at
+ * oob boundary returning 127 bytes
+ * and sigurg is received and atmark
+ * is set.
+ * oob is '%' and second read returns
+ * 64 bytes.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 150)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ read_oob(pfd, &oob);
+
+ if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
+ fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
+ "atmark %d\n", signal_recvd, len, oob, atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+
+ len = read_data(pfd, buf, 1024);
+ if (len != 64) {
+ fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 4:
+ * verify that a single byte
+ * oob message is delivered.
+ * set non blocking mode and
+ * check proper error is
+ * returned and sigurg is
+ * received and correct
+ * oob is read.
+ */
+
+ set_filemode(pfd, 0);
+
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+ if ((len == -1) && (errno == 11))
+ len = 0;
+
+ read_oob(pfd, &oob);
+
+ if (!signal_recvd || len != 0 || oob != '@') {
+ fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ set_filemode(pfd, 1);
+
+ /* Inline Testing */
+
+ on = 1;
+ if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
+ perror("SO_OOBINLINE");
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 1 -- Inline:
+ * Check that SIGURG is
+ * delivered and 63 bytes are
+ * read and oob is '@'
+ */
+
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+
+ if (!signal_recvd || len != 63) {
+ fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
+ signal_recvd, len);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+
+ if (len != 1) {
+ fprintf(stderr,
+ "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 2 -- Inline:
+ * Verify that the first OOB is over written by
+ * the 2nd one and read breaks correctly on
+ * 2nd OOB boundary with the first OOB returned as
+ * part of the read, and sigurg is delivered and
+ * siocatmark returns true.
+ * next read returns one byte, the oob byte
+ * and siocatmark returns false.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 70)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 127 || atmark != 1 || !signal_recvd) {
+ fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
+ len, atmark);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 1 || buf[0] != '#' || atmark == 1) {
+ fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
+ len, buf[0], atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 3 -- Inline:
+ * verify that 2nd oob over writes
+ * the first one and read breaks at
+ * oob boundary returning 127 bytes
+ * and sigurg is received and siocatmark
+ * is true after the read.
+ * subsequent read returns 65 bytes
+ * because of oob which should be '%'.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 126)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (!signal_recvd || len != 127 || !atmark) {
+ fprintf(stderr,
+ "Test 3 Inline failed, sigurg %d len %d data %c\n",
+ signal_recvd, len, buf[0]);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 65 || buf[0] != '%' || atmark != 0) {
+ fprintf(stderr,
+ "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
+ len, buf[0], atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 4 -- Inline:
+ * verify that a single
+ * byte oob message is delivered
+ * and read returns one byte, the oob
+ * byte and sigurg is received
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 1 || buf[0] != '@') {
+ fprintf(stderr,
+ "Test 4 Inline failed, signal %d len %d data %c\n",
+ signal_recvd, len, buf[0]);
+ die(1);
+ }
+ die(0);
+}
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 6f905b53904f..21b646d10b88 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -42,3 +42,4 @@ CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_MIRRED=m
CONFIG_BAREUDP=m
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index a8ad92850e63..13350cd5c8ac 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -37,6 +37,9 @@
#
# server / client nomenclature relative to ns-A
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
VERBOSE=0
NSA_DEV=eth1
@@ -3879,6 +3882,32 @@ use_case_ping_lla_multi()
log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C"
}
+# Perform IPv{4,6} SNAT on ns-A, and verify TCP connection is successfully
+# established with ns-B.
+use_case_snat_on_vrf()
+{
+ setup "yes"
+
+ local port="12345"
+
+ run_cmd iptables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+ run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+
+ run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} &
+ sleep 1
+ run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port}
+ log_test $? 0 "IPv4 TCP connection over VRF with SNAT"
+
+ run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} &
+ sleep 1
+ run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port}
+ log_test $? 0 "IPv6 TCP connection over VRF with SNAT"
+
+ # Cleanup
+ run_cmd iptables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+ run_cmd ip6tables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+}
+
use_cases()
{
log_section "Use cases"
@@ -3886,6 +3915,8 @@ use_cases()
use_case_br
log_subsection "Ping LLA with multiple interfaces"
use_case_ping_lla_multi
+ log_subsection "SNAT on VRF"
+ use_case_snat_on_vrf
}
################################################################################
@@ -3946,7 +3977,7 @@ fi
which nettest >/dev/null
if [ $? -ne 0 ]; then
echo "'nettest' command not found; skipping tests"
- exit 0
+ exit $ksft_skip
fi
declare -i nfail=0
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index a93e6b690e06..43ea8407a82e 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -3,6 +3,9 @@
# This test is for checking IPv4 and IPv6 FIB rules API
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
ret=0
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
@@ -238,12 +241,12 @@ run_fibrule_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
# start clean
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 13d3d4428a32..2c14a86adaaa 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -1,6 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
##############################################################################
# Defines
@@ -9,11 +12,11 @@ if [[ ! -v DEVLINK_DEV ]]; then
| jq -r '.port | keys[]' | cut -d/ -f-2)
if [ -z "$DEVLINK_DEV" ]; then
echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
- exit 1
+ exit $ksft_skip
fi
if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
echo "SKIP: devlink device's bus is not PCI"
- exit 1
+ exit $ksft_skip
fi
DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
@@ -22,7 +25,7 @@ elif [[ ! -z "$DEVLINK_DEV" ]]; then
devlink dev show $DEVLINK_DEV &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: devlink device \"$DEVLINK_DEV\" not found"
- exit 1
+ exit $ksft_skip
fi
fi
@@ -32,19 +35,19 @@ fi
devlink help 2>&1 | grep resource &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink resource support"
- exit 1
+ exit $ksft_skip
fi
devlink help 2>&1 | grep trap &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink trap support"
- exit 1
+ exit $ksft_skip
fi
devlink dev help 2>&1 | grep info &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink dev info support"
- exit 1
+ exit $ksft_skip
fi
##############################################################################
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 42e28c983d41..e7fc5c35b569 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -4,6 +4,9 @@
##############################################################################
# Defines
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
# Can be overridden by the configuration file.
PING=${PING:=ping}
PING6=${PING6:=ping6}
@@ -38,7 +41,7 @@ check_tc_version()
tc -j &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing JSON support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -51,7 +54,7 @@ check_tc_mpls_support()
matchall action pipe &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing MPLS support"
- return 1
+ return $ksft_skip
fi
tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
matchall
@@ -69,7 +72,7 @@ check_tc_mpls_lse_stats()
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
- return 1
+ return $ksft_skip
fi
tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
@@ -79,7 +82,7 @@ check_tc_mpls_lse_stats()
if [[ $ret -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
- return 1
+ return $ksft_skip
fi
}
@@ -88,7 +91,7 @@ check_tc_shblock_support()
tc filter help 2>&1 | grep block &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing shared block support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -97,7 +100,7 @@ check_tc_chain_support()
tc help 2>&1|grep chain &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing chain support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -106,7 +109,7 @@ check_tc_action_hw_stats_support()
tc actions help 2>&1 | grep -q hw_stats
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -115,13 +118,13 @@ check_ethtool_lanes_support()
ethtool --help 2>&1| grep lanes &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: ethtool too old; it is missing lanes support"
- exit 1
+ exit $ksft_skip
fi
}
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [[ "$CHECK_TC" = "yes" ]]; then
@@ -134,7 +137,7 @@ require_command()
if [[ ! -x "$(command -v "$cmd")" ]]; then
echo "SKIP: $cmd not installed"
- exit 1
+ exit $ksft_skip
fi
}
@@ -143,7 +146,7 @@ require_command $MZ
if [[ ! -v NUM_NETIFS ]]; then
echo "SKIP: importer does not define \"NUM_NETIFS\""
- exit 1
+ exit $ksft_skip
fi
##############################################################################
@@ -203,7 +206,7 @@ for ((i = 1; i <= NUM_NETIFS; ++i)); do
ip link show dev ${NETIFS[p$i]} &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: could not find all required interfaces"
- exit 1
+ exit $ksft_skip
fi
done
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 76efb1f8375e..a0d612e04990 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -411,7 +411,7 @@ ping_ipv6()
ip nexthop ls >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "Nexthop objects not supported; skipping tests"
- exit 0
+ exit $ksft_skip
fi
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index 4898dd4118f1..cb08ffe2356a 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -386,7 +386,7 @@ ping_ipv6()
ip nexthop ls >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "Nexthop objects not supported; skipping tests"
- exit 0
+ exit $ksft_skip
fi
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
new file mode 100644
index 000000000000..cf37ce86b0fd
--- /dev/null
+++ b/tools/testing/selftests/net/gro.c
@@ -0,0 +1,1095 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ * Data packets of the same size and same header setup with correct
+ * sequence numbers coalesce. The one exception being the last data
+ * packet coalesced: it can be smaller than the rest and coalesced
+ * as long as it is in the same flow.
+ * 2.ack
+ * Pure ACK does not coalesce.
+ * 3.flags
+ * Specific test cases: no packets with PSH, SYN, URG, RST set will
+ * be coalesced.
+ * 4.tcp
+ * Packets with incorrect checksum, non-consecutive seqno and
+ * different TCP header options shouldn't coalesce. Nit: given that
+ * some extension headers have paddings, such as timestamp, headers
+ * that are padding differently would not be coalesced.
+ * 5.ip:
+ * Packets with different (ECN, TTL, TOS) header, ip options or
+ * ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define SIP6 "fdaa::2"
+#define DIP6 "fdaa::1"
+#define SIP4 "192.168.1.200"
+#define DIP4 "192.168.1.100"
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+
+static void vlog(const char *fmt, ...)
+{
+ va_list args;
+
+ if (verbose) {
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ }
+}
+
+static void setup_sock_filter(int fd)
+{
+ const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+ const int ethproto_off = offsetof(struct ethhdr, h_proto);
+ int optlen = 0;
+ int ipproto_off;
+ int next_off;
+
+ if (proto == PF_INET)
+ next_off = offsetof(struct iphdr, protocol);
+ else
+ next_off = offsetof(struct ipv6hdr, nexthdr);
+ ipproto_off = ETH_HLEN + next_off;
+
+ if (strcmp(testname, "ip") == 0) {
+ if (proto == PF_INET)
+ optlen = sizeof(struct ip_timestamp);
+ else
+ optlen = sizeof(struct ip6_frag);
+ }
+
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+
+ struct sock_fprog bpf = {
+ .len = ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+ error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+ uint16_t *words = data;
+ int i;
+
+ for (i = 0; i < len / 2; i++)
+ sum += words[i];
+ if (len & 1)
+ sum += ((char *)data)[len - 1];
+ return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+ sum = checksum_nofold(data, len, sum);
+ while (sum > 0xFFFF)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+ struct pseudo_header6 {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph6;
+ struct pseudo_header4 {
+ struct in_addr saddr;
+ struct in_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph4;
+ uint32_t sum = 0;
+
+ if (proto == PF_INET6) {
+ if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1)
+ error(1, errno, "inet_pton6 source ip pseudo");
+ if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1)
+ error(1, errno, "inet_pton6 dest ip pseudo");
+ ph6.protocol = htons(IPPROTO_TCP);
+ ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+ } else if (proto == PF_INET) {
+ if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1)
+ error(1, errno, "inet_pton source ip pseudo");
+ if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1)
+ error(1, errno, "inet_pton dest ip pseudo");
+ ph4.protocol = htons(IPPROTO_TCP);
+ ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+ }
+
+ return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+ if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &mac_addr[0], &mac_addr[1], &mac_addr[2],
+ &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+ error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+ struct ethhdr *eth = buf;
+
+ memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+ memcpy(eth->h_source, src_mac, ETH_ALEN);
+ eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len)
+{
+ struct ipv6hdr *ip6h = buf;
+ struct iphdr *iph = buf;
+
+ if (proto == PF_INET6) {
+ memset(ip6h, 0, sizeof(*ip6h));
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+ ip6h->nexthdr = IPPROTO_TCP;
+ ip6h->hop_limit = 8;
+ if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1)
+ error(1, errno, "inet_pton source ip6");
+ if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1)
+ error(1, errno, "inet_pton dest ip6");
+ } else if (proto == PF_INET) {
+ memset(iph, 0, sizeof(*iph));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->ttl = 8;
+ iph->protocol = IPPROTO_TCP;
+ iph->tot_len = htons(sizeof(struct tcphdr) +
+ payload_len + sizeof(struct iphdr));
+ iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+ if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1)
+ error(1, errno, "inet_pton source ip");
+ if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1)
+ error(1, errno, "inet_pton dest ip");
+ iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+ }
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ struct tcphdr *tcph = buf;
+
+ memset(tcph, 0, sizeof(*tcph));
+
+ tcph->source = htons(SPORT);
+ tcph->dest = htons(DPORT);
+ tcph->seq = ntohl(START_SEQ + seq_offset);
+ tcph->ack_seq = ntohl(START_ACK + ack_offset);
+ tcph->ack = 1;
+ tcph->fin = fin;
+ tcph->doff = 5;
+ tcph->window = htons(TCP_MAXWIN);
+ tcph->urg_ptr = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+ int ret = -1;
+
+ ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+ if (ret == -1)
+ error(1, errno, "sendto failure");
+ if (ret != len)
+ error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ memset(buf, 0, total_hdr_len);
+ memset(buf + total_hdr_len, 'a', payload_len);
+ fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+ payload_len, fin);
+ fill_networklayer(buf + ETH_HLEN, payload_len);
+ fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+ int rst, int urg)
+{
+ static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ int payload_len, pkt_size, flag, i;
+ struct tcphdr *tcph;
+
+ payload_len = PAYLOAD_LEN * psh;
+ pkt_size = total_hdr_len + payload_len;
+ flag = NUM_PACKETS / 2;
+
+ create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+ tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+ tcph->psh = psh;
+ tcph->syn = syn;
+ tcph->rst = rst;
+ tcph->urg = urg;
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+
+ for (i = 0; i < NUM_PACKETS + 1; i++) {
+ if (i == flag) {
+ write_packet(fd, flag_buf, pkt_size, daddr);
+ continue;
+ }
+ create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+ }
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+ int payload_len1, int payload_len2)
+{
+ static char buf[ETH_HLEN + IP_MAXPACKET];
+
+ create_packet(buf, 0, 0, payload_len1, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+ create_packet(buf, payload_len1, 0, payload_len2, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+ static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+ static char last[TOTAL_HDR_LEN + MSS];
+ static char new_seg[TOTAL_HDR_LEN + MSS];
+ int i;
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ create_packet(pkts[i], i * MSS, 0, MSS, 0);
+ create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+ create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+ write_packet(fd, last, total_hdr_len + remainder, daddr);
+ write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN];
+
+ create_packet(buf, 0, 0, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ create_packet(buf, 0, 1, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+ struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ memmove(buf, no_ext, total_hdr_len);
+ memmove(buf + total_hdr_len + extlen,
+ no_ext + total_hdr_len, PAYLOAD_LEN);
+
+ tcphdr->doff = tcphdr->doff + (extlen / 4);
+ tcphdr->check = 0;
+ tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+ if (proto == PF_INET) {
+ iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ }
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+ struct tcp_option_ts {
+ uint8_t kind;
+ uint8_t len;
+ uint32_t tsval;
+ uint32_t tsecr;
+ } *opt_ts = (void *)buf;
+ struct tcp_option_window {
+ uint8_t kind;
+ uint8_t len;
+ uint8_t shift;
+ } *opt_window = (void *)buf;
+
+ switch (kind) {
+ case TCPOPT_NOP:
+ buf[0] = TCPOPT_NOP;
+ break;
+ case TCPOPT_WINDOW:
+ memset(opt_window, 0, sizeof(struct tcp_option_window));
+ opt_window->kind = TCPOPT_WINDOW;
+ opt_window->len = TCPOLEN_WINDOW;
+ opt_window->shift = 0;
+ break;
+ case TCPOPT_TIMESTAMP:
+ memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+ opt_ts->kind = TCPOPT_TIMESTAMP;
+ opt_ts->len = TCPOLEN_TIMESTAMP;
+ opt_ts->tsval = ts;
+ opt_ts->tsecr = 0;
+ break;
+ default:
+ error(1, 0, "unimplemented TCP option");
+ break;
+ }
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+ switch (order) {
+ case 0:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+ TCPOPT_TIMESTAMP, ts);
+ break;
+ case 1:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1,
+ TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+ TCPOPT_NOP, 0);
+ break;
+ case 2:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+ TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+ TCPOPT_NOP, 0);
+ break;
+ default:
+ error(1, 0, "unknown order");
+ break;
+ }
+ recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->check = tcph->check - 1;
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->seq = ntohl(htonl(tcph->seq) + 1);
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+ * don't coalesce.
+ */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 1);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 2);
+ write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+ int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+ int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+ recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+ write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+ struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+ int optlen = sizeof(struct ip_timestamp);
+ struct iphdr *iph;
+
+ if (optlen % 4)
+ error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+ ts->ipt_code = IPOPT_TS;
+ ts->ipt_len = optlen;
+ ts->ipt_ptr = 5;
+ ts->ipt_flg = IPOPT_TS_TSONLY;
+
+ memcpy(optpkt, buf, tcp_offset);
+ memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+ iph = (struct iphdr *)(optpkt + ETH_HLEN);
+ iph->ihl = 5 + (optlen / 4);
+ iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+ int optlen = sizeof(struct ip_timestamp);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+ add_ipv4_ts_option(buf, optpkt);
+ write_packet(fd, optpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/* IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[IP_MAXPACKET];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ /* Once fragmented, packet would retain the total_len.
+ * Tcp header is prepared as if rest of data is in follow-up frags,
+ * but follow up frags aren't actually sent.
+ */
+ memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+ fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+ fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+ fill_datalinklayer(buf);
+
+ iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ iph->ttl = 7;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ iph->tos = 1;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else if (proto == PF_INET6) {
+ ip6h->priority = 0xf;
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+ sizeof(struct ip6_frag)];
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+ int extlen = sizeof(struct ip6_frag);
+ int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+ int extpkt_len = bufpkt_len + extlen;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+ }
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ memset(extpkt, 0, extpkt_len);
+
+ ip6h->nexthdr = IPPROTO_FRAGMENT;
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ frag->ip6f_nxt = IPPROTO_TCP;
+
+ memcpy(extpkt, buf, tcp_offset);
+ memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+ write_packet(fd, extpkt, extpkt_len, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+ struct sockaddr_ll daddr = {};
+
+ daddr.sll_family = AF_PACKET;
+ daddr.sll_protocol = ethhdr_proto;
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+
+ if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+ error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+ struct timeval timeout;
+
+ timeout.tv_sec = 120;
+ timeout.tv_usec = 0;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+ sizeof(timeout)) < 0)
+ error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+ int correct_num_pkts)
+{
+ static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+ struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+ struct tcphdr *tcph;
+ bool bad_packet = false;
+ int tcp_ext_len = 0;
+ int ip_ext_len = 0;
+ int pkt_size = -1;
+ int data_len = 0;
+ int num_pkt = 0;
+ int i;
+
+ vlog("Expected {");
+ for (i = 0; i < correct_num_pkts; i++)
+ vlog("%d ", correct_payload[i]);
+ vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+ while (1) {
+ pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+ if (pkt_size < 0)
+ error(1, errno, "could not receive");
+
+ if (iph->version == 4)
+ ip_ext_len = (iph->ihl - 5) * 4;
+ else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+ ip_ext_len = sizeof(struct ip6_frag);
+
+ tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+ if (tcph->fin)
+ break;
+
+ tcp_ext_len = (tcph->doff - 5) * 4;
+ data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+ /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+ * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+ * Packet sockets are protocol agnostic, and will not trim the padding.
+ */
+ if (pkt_size == ETH_ZLEN && iph->version == 4) {
+ data_len = ntohs(iph->tot_len)
+ - sizeof(struct tcphdr) - sizeof(struct iphdr);
+ }
+ vlog("%d ", data_len);
+ if (data_len != correct_payload[num_pkt]) {
+ vlog("[!=%d]", correct_payload[num_pkt]);
+ bad_packet = true;
+ }
+ num_pkt++;
+ }
+ vlog("}, Total %d packets.\n", num_pkt);
+ if (num_pkt != correct_num_pkts)
+ error(1, 0, "incorrect number of packets");
+ if (bad_packet)
+ error(1, 0, "incorrect packet geometry");
+
+ printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+ static char fin_pkt[MAX_HDR_LEN];
+ struct sockaddr_ll daddr = {};
+ int txfd = -1;
+
+ txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (txfd < 0)
+ error(1, errno, "socket creation");
+
+ memset(&daddr, 0, sizeof(daddr));
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+ daddr.sll_family = AF_PACKET;
+ memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+ daddr.sll_halen = ETH_ALEN;
+ create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+ if (strcmp(testname, "data") == 0) {
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ack") == 0) {
+ send_ack(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "flags") == 0) {
+ send_flags(txfd, &daddr, 1, 0, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 1, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 1, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 0, 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "tcp") == 0) {
+ send_changed_checksum(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_seq(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_ts(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_diff_opt(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip") == 0) {
+ send_changed_ECN(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_tos(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ if (proto == PF_INET) {
+ /* Modified packets may be received out of order.
+ * Sleep function added to enforce test boundaries
+ * so that fin pkts are not received prior to other pkts.
+ */
+ sleep(1);
+ send_changed_ttl(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_ip_options(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_fragment4(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (proto == PF_INET6) {
+ send_fragment6(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ /* 20 is the difference between min iphdr size
+ * and min ipv6hdr size. Like MAX_HDR_SIZE,
+ * MAX_PAYLOAD is defined with the larger header of the two.
+ */
+ int offset = proto == PF_INET ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ send_large(txfd, &daddr, remainder);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_large(txfd, &daddr, remainder + 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else {
+ error(1, 0, "Unknown testcase");
+ }
+
+ if (close(txfd))
+ error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+ static int correct_payload[NUM_PACKETS];
+ int rxfd = -1;
+
+ rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+ if (rxfd < 0)
+ error(1, 0, "socket creation");
+ setup_sock_filter(rxfd);
+ set_timeout(rxfd);
+ bind_packetsocket(rxfd);
+
+ memset(correct_payload, 0, sizeof(correct_payload));
+
+ if (strcmp(testname, "data") == 0) {
+ printf("pure data packet of same size: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("large data packets followed by a smaller one: ");
+ correct_payload[0] = PAYLOAD_LEN * 1.5;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("small data packets followed by a larger one: ");
+ correct_payload[0] = PAYLOAD_LEN / 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ack") == 0) {
+ printf("duplicate ack and pure ack: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "flags") == 0) {
+ correct_payload[0] = PAYLOAD_LEN * 3;
+ correct_payload[1] = PAYLOAD_LEN * 2;
+
+ printf("psh flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = 0;
+ correct_payload[2] = PAYLOAD_LEN * 2;
+ printf("syn flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("rst flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("urg flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "tcp") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ correct_payload[3] = PAYLOAD_LEN;
+
+ printf("changed checksum does not coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Wrong Seq number doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Different timestamp doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 4);
+
+ printf("Different options doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+
+ printf("different ECN doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("different tos doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ if (proto == PF_INET) {
+ printf("different ttl doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("ip options doesn't coalesce: ");
+ correct_payload[2] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("fragmented ip4 doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (proto == PF_INET6) {
+ /* GRO doesn't check for ipv6 hop limit when flushing.
+ * Hence no corresponding test to the ipv4 case.
+ */
+ printf("fragmented ip6 doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ int offset = proto == PF_INET ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ correct_payload[0] = (MAX_PAYLOAD + offset);
+ correct_payload[1] = remainder;
+ printf("Shouldn't coalesce if exceed IP max pkt size: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* last segment sent individually, doesn't start new segment */
+ correct_payload[0] = correct_payload[0] - remainder;
+ correct_payload[1] = remainder + 1;
+ correct_payload[2] = remainder + 1;
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else {
+ error(1, 0, "Test case error, should never trigger");
+ }
+
+ if (close(rxfd))
+ error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+ static const struct option opts[] = {
+ { "dmac", required_argument, NULL, 'D' },
+ { "iface", required_argument, NULL, 'i' },
+ { "ipv4", no_argument, NULL, '4' },
+ { "ipv6", no_argument, NULL, '6' },
+ { "rx", no_argument, NULL, 'r' },
+ { "smac", required_argument, NULL, 'S' },
+ { "test", required_argument, NULL, 't' },
+ { "verbose", no_argument, NULL, 'v' },
+ { 0, 0, 0, 0 }
+ };
+ int c;
+
+ while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) {
+ switch (c) {
+ case '4':
+ proto = PF_INET;
+ ethhdr_proto = htons(ETH_P_IP);
+ break;
+ case '6':
+ proto = PF_INET6;
+ ethhdr_proto = htons(ETH_P_IPV6);
+ break;
+ case 'D':
+ dmac = optarg;
+ break;
+ case 'i':
+ ifname = optarg;
+ break;
+ case 'r':
+ tx_socket = false;
+ break;
+ case 'S':
+ smac = optarg;
+ break;
+ case 't':
+ testname = optarg;
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ default:
+ error(1, 0, "%s invalid option %c\n", __func__, c);
+ break;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_args(argc, argv);
+
+ if (proto == PF_INET) {
+ tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+ total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+ } else if (proto == PF_INET6) {
+ tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+ total_hdr_len = MAX_HDR_LEN;
+ } else {
+ error(1, 0, "Protocol family is not ipv4 or ipv6");
+ }
+
+ read_MAC(src_mac, smac);
+ read_MAC(dst_mac, dmac);
+
+ if (tx_socket)
+ gro_sender();
+ else
+ gro_receiver();
+ return 0;
+}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
new file mode 100755
index 000000000000..342ad27f631b
--- /dev/null
+++ b/tools/testing/selftests/net/gro.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
+readonly PROTOS=("ipv4" "ipv6")
+dev=""
+test="all"
+proto="ipv4"
+
+run_test() {
+ local server_pid=0
+ local exit_code=0
+ local protocol=$1
+ local test=$2
+ local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
+ "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
+
+ setup_ns
+ # Each test is run 3 times to deflake, because given the receive timing,
+ # not all packets that should coalesce will be considered in the same flow
+ # on every try.
+ for tries in {1..3}; do
+ # Actual test starts here
+ ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
+ 1>>log.txt &
+ server_pid=$!
+ sleep 0.5 # to allow for socket init
+ ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \
+ 1>>log.txt
+ wait "${server_pid}"
+ exit_code=$?
+ if [[ "${exit_code}" -eq 0 ]]; then
+ break;
+ fi
+ done
+ cleanup_ns
+ echo ${exit_code}
+}
+
+run_all_tests() {
+ local failed_tests=()
+ for proto in "${PROTOS[@]}"; do
+ for test in "${TESTS[@]}"; do
+ echo "running test ${proto} ${test}" >&2
+ exit_code=$(run_test $proto $test)
+ if [[ "${exit_code}" -ne 0 ]]; then
+ failed_tests+=("${proto}_${test}")
+ fi;
+ done;
+ done
+ if [[ ${#failed_tests[@]} -ne 0 ]]; then
+ echo "failed tests: ${failed_tests[*]}. \
+ Please see log.txt for more logs"
+ exit 1
+ else
+ echo "All Tests Succeeded!"
+ fi;
+}
+
+usage() {
+ echo "Usage: $0 \
+ [-i <DEV>] \
+ [-t data|ack|flags|tcp|ip|large] \
+ [-p <ipv4|ipv6>]" 1>&2;
+ exit 1;
+}
+
+while getopts "i:t:p:" opt; do
+ case "${opt}" in
+ i)
+ dev="${OPTARG}"
+ ;;
+ t)
+ test="${OPTARG}"
+ ;;
+ p)
+ proto="${OPTARG}"
+ ;;
+ *)
+ usage
+ ;;
+ esac
+done
+
+if [ -n "$dev" ]; then
+ source setup_loopback.sh
+else
+ source setup_veth.sh
+fi
+
+setup
+trap cleanup EXIT
+if [[ "${test}" == "all" ]]; then
+ run_all_tests
+else
+ run_test "${proto}" "${test}"
+fi;
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
new file mode 100755
index 000000000000..3caf72bb9c6a
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -0,0 +1,652 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data
+# consistency directly inside packets on the receiver side. Tests are divided
+# into three categories: OUTPUT (evaluates the IOAM processing by the sender),
+# INPUT (evaluates the IOAM processing by the receiver) and GLOBAL (evaluates
+# wider use cases that do not fall into the other two categories). Both OUTPUT
+# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL
+# tests use the entire three-node topology (alpha, beta, gamma). Each test is
+# documented inside its own handler in the code below.
+#
+# An IOAM domain is configured from Alpha to Gamma but not on the reverse path.
+# When either Beta or Gamma is the destination (depending on the test category),
+# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | Alpha netns | | Gamma netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | db01::2/64 | | | | db02::2/64 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +----------------------------------------------------+
+# | . . |
+# | +-------------+ +-------------+ |
+# | | veth0 | | veth1 | |
+# | | db01::1/64 | ................ | db02::1/64 | |
+# | +-------------+ +-------------+ |
+# | |
+# | Beta netns |
+# | |
+# +----------------------------------------------------+
+#
+#
+#
+# =============================================================
+# | Alpha - IOAM configuration |
+# +===========================================================+
+# | Node ID | 1 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 11111111 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 0xffff (default value) |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 0xffffffff (default value) |
+# +-----------------------------------------------------------+
+# | Egress ID | 101 |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 101101 |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee0 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf00dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 777 |
+# +-----------------------------------------------------------+
+# | Schema Data | something that will be 4n-aligned |
+# +-----------------------------------------------------------+
+#
+#
+# =============================================================
+# | Beta - IOAM configuration |
+# +===========================================================+
+# | Node ID | 2 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 22222222 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 201 |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 201201 |
+# +-----------------------------------------------------------+
+# | Egress ID | 202 |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 202202 |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee1 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf11dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 666 |
+# +-----------------------------------------------------------+
+# | Schema Data | Hello there -Obi |
+# +-----------------------------------------------------------+
+#
+#
+# =============================================================
+# | Gamma - IOAM configuration |
+# +===========================================================+
+# | Node ID | 3 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 33333333 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 301 |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 301301 |
+# +-----------------------------------------------------------+
+# | Egress ID | 0xffff (default value) |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 0xffffffff (default value) |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee2 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf22dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 0xffffff (= None) |
+# +-----------------------------------------------------------+
+# | Schema Data | |
+# +-----------------------------------------------------------+
+
+
+################################################################################
+# #
+# WARNING: Be careful if you modify the block below - it MUST be kept #
+# synchronized with configurations inside ioam6_parser.c and always #
+# reflect the same. #
+# #
+################################################################################
+
+ALPHA=(
+ 1 # ID
+ 11111111 # Wide ID
+ 0xffff # Ingress ID
+ 0xffffffff # Ingress Wide ID
+ 101 # Egress ID
+ 101101 # Egress Wide ID
+ 0xdeadbee0 # Namespace Data
+ 0xcafec0caf00dc0de # Namespace Wide Data
+ 777 # Schema ID (0xffffff = None)
+ "something that will be 4n-aligned" # Schema Data
+)
+
+BETA=(
+ 2
+ 22222222
+ 201
+ 201201
+ 202
+ 202202
+ 0xdeadbee1
+ 0xcafec0caf11dc0de
+ 666
+ "Hello there -Obi"
+)
+
+GAMMA=(
+ 3
+ 33333333
+ 301
+ 301301
+ 0xffff
+ 0xffffffff
+ 0xdeadbee2
+ 0xcafec0caf22dc0de
+ 0xffffff
+ ""
+)
+
+TESTS_OUTPUT="
+ out_undef_ns
+ out_no_room
+ out_bits
+ out_full_supp_trace
+"
+
+TESTS_INPUT="
+ in_undef_ns
+ in_no_room
+ in_oflag
+ in_bits
+ in_full_supp_trace
+"
+
+TESTS_GLOBAL="
+ fwd_full_supp_trace
+"
+
+
+################################################################################
+# #
+# LIBRARY #
+# #
+################################################################################
+
+check_kernel_compatibility()
+{
+ ip netns add ioam-tmp-node
+ ip link add name veth0 netns ioam-tmp-node type veth \
+ peer name veth1 netns ioam-tmp-node
+
+ ip -netns ioam-tmp-node link set veth0 up
+ ip -netns ioam-tmp-node link set veth1 up
+
+ ip -netns ioam-tmp-node ioam namespace add 0 &>/dev/null
+ ns_ad=$?
+
+ ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0"
+ ns_sh=$?
+
+ if [[ $ns_ad != 0 || $ns_sh != 0 ]]
+ then
+ echo "SKIP: kernel version probably too old, missing ioam support"
+ ip link del veth0 2>/dev/null || true
+ ip netns del ioam-tmp-node || true
+ exit 1
+ fi
+
+ ip -netns ioam-tmp-node route add db02::/64 encap ioam6 trace prealloc \
+ type 0x800000 ns 0 size 4 dev veth0 &>/dev/null
+ tr_ad=$?
+
+ ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6 trace"
+ tr_sh=$?
+
+ if [[ $tr_ad != 0 || $tr_sh != 0 ]]
+ then
+ echo "SKIP: cannot attach an ioam trace to a route, did you compile" \
+ "without CONFIG_IPV6_IOAM6_LWTUNNEL?"
+ ip link del veth0 2>/dev/null || true
+ ip netns del ioam-tmp-node || true
+ exit 1
+ fi
+
+ ip link del veth0 2>/dev/null || true
+ ip netns del ioam-tmp-node || true
+}
+
+cleanup()
+{
+ ip link del ioam-veth-alpha 2>/dev/null || true
+ ip link del ioam-veth-gamma 2>/dev/null || true
+
+ ip netns del ioam-node-alpha || true
+ ip netns del ioam-node-beta || true
+ ip netns del ioam-node-gamma || true
+}
+
+setup()
+{
+ ip netns add ioam-node-alpha
+ ip netns add ioam-node-beta
+ ip netns add ioam-node-gamma
+
+ ip link add name ioam-veth-alpha netns ioam-node-alpha type veth \
+ peer name ioam-veth-betaL netns ioam-node-beta
+ ip link add name ioam-veth-betaR netns ioam-node-beta type veth \
+ peer name ioam-veth-gamma netns ioam-node-gamma
+
+ ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0
+ ip -netns ioam-node-beta link set ioam-veth-betaL name veth0
+ ip -netns ioam-node-beta link set ioam-veth-betaR name veth1
+ ip -netns ioam-node-gamma link set ioam-veth-gamma name veth0
+
+ ip -netns ioam-node-alpha addr add db01::2/64 dev veth0
+ ip -netns ioam-node-alpha link set veth0 up
+ ip -netns ioam-node-alpha link set lo up
+ ip -netns ioam-node-alpha route add db02::/64 via db01::1 dev veth0
+ ip -netns ioam-node-alpha route del db01::/64
+ ip -netns ioam-node-alpha route add db01::/64 dev veth0
+
+ ip -netns ioam-node-beta addr add db01::1/64 dev veth0
+ ip -netns ioam-node-beta addr add db02::1/64 dev veth1
+ ip -netns ioam-node-beta link set veth0 up
+ ip -netns ioam-node-beta link set veth1 up
+ ip -netns ioam-node-beta link set lo up
+
+ ip -netns ioam-node-gamma addr add db02::2/64 dev veth0
+ ip -netns ioam-node-gamma link set veth0 up
+ ip -netns ioam-node-gamma link set lo up
+ ip -netns ioam-node-gamma route add db01::/64 via db02::1 dev veth0
+
+ # - IOAM config -
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]}
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]}
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]}
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]}
+ ip -netns ioam-node-alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]}
+ ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}"
+ ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
+
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]}
+ ip -netns ioam-node-beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]}
+ ip -netns ioam-node-beta ioam schema add ${BETA[8]} "${BETA[9]}"
+ ip -netns ioam-node-beta ioam namespace set 123 schema ${BETA[8]}
+
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]}
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]}
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]}
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]}
+ ip -netns ioam-node-gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]}
+
+ sleep 1
+
+ ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null
+ if [ $? != 0 ]
+ then
+ echo "Setup FAILED"
+ cleanup &>/dev/null
+ exit 0
+ fi
+}
+
+log_test_passed()
+{
+ local desc=$1
+ printf "TEST: %-60s [ OK ]\n" "${desc}"
+}
+
+log_test_failed()
+{
+ local desc=$1
+ printf "TEST: %-60s [FAIL]\n" "${desc}"
+}
+
+run_test()
+{
+ local name=$1
+ local desc=$2
+ local node_src=$3
+ local node_dst=$4
+ local ip6_src=$5
+ local ip6_dst=$6
+ local if_dst=$7
+ local trace_type=$8
+ local ioam_ns=$9
+
+ ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \
+ $trace_type $ioam_ns &
+ local spid=$!
+ sleep 0.1
+
+ ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null
+ if [ $? != 0 ]
+ then
+ log_test_failed "${desc}"
+ kill -2 $spid &>/dev/null
+ else
+ wait $spid
+ [ $? = 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+ fi
+}
+
+run()
+{
+ echo
+ echo "OUTPUT tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ # set OUTPUT settings
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0
+
+ for t in $TESTS_OUTPUT
+ do
+ $t
+ done
+
+ # clean OUTPUT settings
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip -netns ioam-node-alpha route change db01::/64 dev veth0
+
+
+ echo
+ echo "INPUT tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ # set INPUT settings
+ ip -netns ioam-node-alpha ioam namespace del 123
+
+ for t in $TESTS_INPUT
+ do
+ $t
+ done
+
+ # clean INPUT settings
+ ip -netns ioam-node-alpha ioam namespace add 123 \
+ data ${ALPHA[6]} wide ${ALPHA[7]}
+ ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
+ ip -netns ioam-node-alpha route change db01::/64 dev veth0
+
+
+ echo
+ echo "GLOBAL tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ for t in $TESTS_GLOBAL
+ do
+ $t
+ done
+}
+
+bit2type=(
+ 0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000
+ 0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100
+ 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002
+)
+bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 )
+
+
+################################################################################
+# #
+# OUTPUT tests #
+# #
+# Two nodes (sender/receiver), IOAM disabled on ingress for the receiver. #
+################################################################################
+
+out_undef_ns()
+{
+ ##############################################################################
+ # Make sure that the encap node won't fill the trace if the chosen IOAM #
+ # namespace is not configured locally. #
+ ##############################################################################
+ local desc="Unknown IOAM namespace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0x800000 ns 0 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0x800000 0
+}
+
+out_no_room()
+{
+ ##############################################################################
+ # Make sure that the encap node won't fill the trace and will set the #
+ # Overflow flag since there is no room enough for its data. #
+ ##############################################################################
+ local desc="Missing trace room"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xc00000 123
+}
+
+out_bits()
+{
+ ##############################################################################
+ # Make sure that, for each trace type bit, the encap node will either: #
+ # (i) fill the trace with its data when it is a supported bit #
+ # (ii) not fill the trace with its data when it is an unsupported bit #
+ ##############################################################################
+ local desc="Trace type with bit <n> only"
+
+ local tmp=${bit2size[22]}
+ bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
+
+ for i in {0..22}
+ do
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
+ prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
+
+ run_test "out_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
+ db01::2 db01::1 veth0 ${bit2type[$i]} 123
+ done
+
+ bit2size[22]=$tmp
+}
+
+out_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that the encap node will correctly fill a full trace. Be careful,#
+ # "full trace" here does NOT mean all bits (only supported ones). #
+ ##############################################################################
+ local desc="Full supported trace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xfff002 ns 123 size 100 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xfff002 123
+}
+
+
+################################################################################
+# #
+# INPUT tests #
+# #
+# Two nodes (sender/receiver), the sender MUST NOT fill the trace upon #
+# insertion -> the IOAM namespace configured on the sender is removed #
+# and is used in the inserted trace to force the sender not to fill it. #
+################################################################################
+
+in_undef_ns()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace if the related IOAM #
+ # namespace is not configured locally. #
+ ##############################################################################
+ local desc="Unknown IOAM namespace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0x800000 ns 0 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0x800000 0
+}
+
+in_no_room()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace and will set the #
+ # Overflow flag if there is no room enough for its data. #
+ ##############################################################################
+ local desc="Missing trace room"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xc00000 123
+}
+
+in_bits()
+{
+ ##############################################################################
+ # Make sure that, for each trace type bit, the receiving node will either: #
+ # (i) fill the trace with its data when it is a supported bit #
+ # (ii) not fill the trace with its data when it is an unsupported bit #
+ ##############################################################################
+ local desc="Trace type with bit <n> only"
+
+ local tmp=${bit2size[22]}
+ bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
+
+ for i in {0..22}
+ do
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
+ prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
+
+ run_test "in_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
+ db01::2 db01::1 veth0 ${bit2type[$i]} 123
+ done
+
+ bit2size[22]=$tmp
+}
+
+in_oflag()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace since the Overflow #
+ # flag is set. #
+ ##############################################################################
+ local desc="Overflow flag is set"
+
+ # Exception:
+ # Here, we need the sender to set the Overflow flag. For that, we will add
+ # back the IOAM namespace that was previously configured on the sender.
+ ip -netns ioam-node-alpha ioam namespace add 123
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xc00000 123
+
+ # And we clean the exception for this test to get things back to normal for
+ # other INPUT tests
+ ip -netns ioam-node-alpha ioam namespace del 123
+}
+
+in_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that the receiving node will correctly fill a full trace. Be #
+ # careful, "full trace" here does NOT mean all bits (only supported ones). #
+ ##############################################################################
+ local desc="Full supported trace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xfff002 ns 123 size 80 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xfff002 123
+}
+
+
+################################################################################
+# #
+# GLOBAL tests #
+# #
+# Three nodes (sender/router/receiver), IOAM fully enabled on every node. #
+################################################################################
+
+fwd_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that all three nodes correctly filled the full supported trace #
+ # by checking that the trace data is consistent with the predefined config. #
+ ##############################################################################
+ local desc="Forward - Full supported trace"
+
+ ip -netns ioam-node-alpha route change db02::/64 encap ioam6 trace prealloc \
+ type 0xfff002 ns 123 size 244 via db01::1 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-gamma db01::2 \
+ db02::2 veth0 0xfff002 123
+}
+
+
+################################################################################
+# #
+# MAIN #
+# #
+################################################################################
+
+if [ "$(id -u)" -ne 0 ]
+then
+ echo "SKIP: Need root privileges"
+ exit 1
+fi
+
+if [ ! -x "$(command -v ip)" ]
+then
+ echo "SKIP: Could not run test without ip tool"
+ exit 1
+fi
+
+ip ioam &>/dev/null
+if [ $? = 1 ]
+then
+ echo "SKIP: iproute2 too old, missing ioam command"
+ exit 1
+fi
+
+check_kernel_compatibility
+
+cleanup &>/dev/null
+setup
+run
+cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
new file mode 100644
index 000000000000..d376cb2c383c
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -0,0 +1,720 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Author: Justin Iurman (justin.iurman@uliege.be)
+ *
+ * IOAM tester for IPv6, see ioam6.sh for details on each test case.
+ */
+#include <arpa/inet.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/const.h>
+#include <linux/if_ether.h>
+#include <linux/ioam6.h>
+#include <linux/ipv6.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+struct ioam_config {
+ __u32 id;
+ __u64 wide;
+ __u16 ingr_id;
+ __u16 egr_id;
+ __u32 ingr_wide;
+ __u32 egr_wide;
+ __u32 ns_data;
+ __u64 ns_wide;
+ __u32 sc_id;
+ __u8 hlim;
+ char *sc_data;
+};
+
+/*
+ * Be careful if you modify structs below - everything MUST be kept synchronized
+ * with configurations inside ioam6.sh and always reflect the same.
+ */
+
+static struct ioam_config node1 = {
+ .id = 1,
+ .wide = 11111111,
+ .ingr_id = 0xffff, /* default value */
+ .egr_id = 101,
+ .ingr_wide = 0xffffffff, /* default value */
+ .egr_wide = 101101,
+ .ns_data = 0xdeadbee0,
+ .ns_wide = 0xcafec0caf00dc0de,
+ .sc_id = 777,
+ .sc_data = "something that will be 4n-aligned",
+ .hlim = 64,
+};
+
+static struct ioam_config node2 = {
+ .id = 2,
+ .wide = 22222222,
+ .ingr_id = 201,
+ .egr_id = 202,
+ .ingr_wide = 201201,
+ .egr_wide = 202202,
+ .ns_data = 0xdeadbee1,
+ .ns_wide = 0xcafec0caf11dc0de,
+ .sc_id = 666,
+ .sc_data = "Hello there -Obi",
+ .hlim = 63,
+};
+
+static struct ioam_config node3 = {
+ .id = 3,
+ .wide = 33333333,
+ .ingr_id = 301,
+ .egr_id = 0xffff, /* default value */
+ .ingr_wide = 301301,
+ .egr_wide = 0xffffffff, /* default value */
+ .ns_data = 0xdeadbee2,
+ .ns_wide = 0xcafec0caf22dc0de,
+ .sc_id = 0xffffff, /* default value */
+ .sc_data = NULL,
+ .hlim = 62,
+};
+
+enum {
+ /**********
+ * OUTPUT *
+ **********/
+ TEST_OUT_UNDEF_NS,
+ TEST_OUT_NO_ROOM,
+ TEST_OUT_BIT0,
+ TEST_OUT_BIT1,
+ TEST_OUT_BIT2,
+ TEST_OUT_BIT3,
+ TEST_OUT_BIT4,
+ TEST_OUT_BIT5,
+ TEST_OUT_BIT6,
+ TEST_OUT_BIT7,
+ TEST_OUT_BIT8,
+ TEST_OUT_BIT9,
+ TEST_OUT_BIT10,
+ TEST_OUT_BIT11,
+ TEST_OUT_BIT12,
+ TEST_OUT_BIT13,
+ TEST_OUT_BIT14,
+ TEST_OUT_BIT15,
+ TEST_OUT_BIT16,
+ TEST_OUT_BIT17,
+ TEST_OUT_BIT18,
+ TEST_OUT_BIT19,
+ TEST_OUT_BIT20,
+ TEST_OUT_BIT21,
+ TEST_OUT_BIT22,
+ TEST_OUT_FULL_SUPP_TRACE,
+
+ /*********
+ * INPUT *
+ *********/
+ TEST_IN_UNDEF_NS,
+ TEST_IN_NO_ROOM,
+ TEST_IN_OFLAG,
+ TEST_IN_BIT0,
+ TEST_IN_BIT1,
+ TEST_IN_BIT2,
+ TEST_IN_BIT3,
+ TEST_IN_BIT4,
+ TEST_IN_BIT5,
+ TEST_IN_BIT6,
+ TEST_IN_BIT7,
+ TEST_IN_BIT8,
+ TEST_IN_BIT9,
+ TEST_IN_BIT10,
+ TEST_IN_BIT11,
+ TEST_IN_BIT12,
+ TEST_IN_BIT13,
+ TEST_IN_BIT14,
+ TEST_IN_BIT15,
+ TEST_IN_BIT16,
+ TEST_IN_BIT17,
+ TEST_IN_BIT18,
+ TEST_IN_BIT19,
+ TEST_IN_BIT20,
+ TEST_IN_BIT21,
+ TEST_IN_BIT22,
+ TEST_IN_FULL_SUPP_TRACE,
+
+ /**********
+ * GLOBAL *
+ **********/
+ TEST_FWD_FULL_SUPP_TRACE,
+
+ __TEST_MAX,
+};
+
+static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
+ __u32 trace_type, __u16 ioam_ns)
+{
+ if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns ||
+ __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8))
+ return 1;
+
+ switch (tid) {
+ case TEST_OUT_UNDEF_NS:
+ case TEST_IN_UNDEF_NS:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 1 ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_NO_ROOM:
+ case TEST_IN_NO_ROOM:
+ case TEST_IN_OFLAG:
+ return !ioam6h->overflow ||
+ ioam6h->nodelen != 2 ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_BIT0:
+ case TEST_IN_BIT0:
+ case TEST_OUT_BIT1:
+ case TEST_IN_BIT1:
+ case TEST_OUT_BIT2:
+ case TEST_IN_BIT2:
+ case TEST_OUT_BIT3:
+ case TEST_IN_BIT3:
+ case TEST_OUT_BIT4:
+ case TEST_IN_BIT4:
+ case TEST_OUT_BIT5:
+ case TEST_IN_BIT5:
+ case TEST_OUT_BIT6:
+ case TEST_IN_BIT6:
+ case TEST_OUT_BIT7:
+ case TEST_IN_BIT7:
+ case TEST_OUT_BIT11:
+ case TEST_IN_BIT11:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 1 ||
+ ioam6h->remlen;
+
+ case TEST_OUT_BIT8:
+ case TEST_IN_BIT8:
+ case TEST_OUT_BIT9:
+ case TEST_IN_BIT9:
+ case TEST_OUT_BIT10:
+ case TEST_IN_BIT10:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 2 ||
+ ioam6h->remlen;
+
+ case TEST_OUT_BIT12:
+ case TEST_IN_BIT12:
+ case TEST_OUT_BIT13:
+ case TEST_IN_BIT13:
+ case TEST_OUT_BIT14:
+ case TEST_IN_BIT14:
+ case TEST_OUT_BIT15:
+ case TEST_IN_BIT15:
+ case TEST_OUT_BIT16:
+ case TEST_IN_BIT16:
+ case TEST_OUT_BIT17:
+ case TEST_IN_BIT17:
+ case TEST_OUT_BIT18:
+ case TEST_IN_BIT18:
+ case TEST_OUT_BIT19:
+ case TEST_IN_BIT19:
+ case TEST_OUT_BIT20:
+ case TEST_IN_BIT20:
+ case TEST_OUT_BIT21:
+ case TEST_IN_BIT21:
+ return ioam6h->overflow ||
+ ioam6h->nodelen ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_BIT22:
+ case TEST_IN_BIT22:
+ return ioam6h->overflow ||
+ ioam6h->nodelen ||
+ ioam6h->remlen;
+
+ case TEST_OUT_FULL_SUPP_TRACE:
+ case TEST_IN_FULL_SUPP_TRACE:
+ case TEST_FWD_FULL_SUPP_TRACE:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 15 ||
+ ioam6h->remlen;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
+ const struct ioam_config cnf)
+{
+ unsigned int len;
+ __u8 aligned;
+ __u64 raw64;
+ __u32 raw32;
+
+ if (ioam6h->type.bit0) {
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff))
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit1) {
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (cnf.ingr_id != (raw32 >> 16) ||
+ cnf.egr_id != (raw32 & 0xffff))
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit2)
+ *p += sizeof(__u32);
+
+ if (ioam6h->type.bit3)
+ *p += sizeof(__u32);
+
+ if (ioam6h->type.bit4) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit5) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit6) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit7) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit8) {
+ raw64 = __be64_to_cpu(*((__u64 *)*p));
+ if (cnf.hlim != (raw64 >> 56) ||
+ cnf.wide != (raw64 & 0xffffffffffffff))
+ return 1;
+ *p += sizeof(__u64);
+ }
+
+ if (ioam6h->type.bit9) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide)
+ return 1;
+ *p += sizeof(__u32);
+
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit10) {
+ if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide)
+ return 1;
+ *p += sizeof(__u64);
+ }
+
+ if (ioam6h->type.bit11) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit22) {
+ len = cnf.sc_data ? strlen(cnf.sc_data) : 0;
+ aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0;
+
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (aligned != (raw32 >> 24) * 4 ||
+ cnf.sc_id != (raw32 & 0xffffff))
+ return 1;
+ *p += sizeof(__u32);
+
+ if (cnf.sc_data) {
+ if (strncmp((char *)*p, cnf.sc_data, len))
+ return 1;
+
+ *p += len;
+ aligned -= len;
+
+ while (aligned--) {
+ if (**p != '\0')
+ return 1;
+ *p += sizeof(__u8);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h,
+ __u32 trace_type, __u16 ioam_ns)
+{
+ __u8 *p;
+
+ if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns))
+ return 1;
+
+ p = ioam6h->data + ioam6h->remlen * 4;
+
+ switch (tid) {
+ case TEST_OUT_BIT0:
+ case TEST_OUT_BIT1:
+ case TEST_OUT_BIT2:
+ case TEST_OUT_BIT3:
+ case TEST_OUT_BIT4:
+ case TEST_OUT_BIT5:
+ case TEST_OUT_BIT6:
+ case TEST_OUT_BIT7:
+ case TEST_OUT_BIT8:
+ case TEST_OUT_BIT9:
+ case TEST_OUT_BIT10:
+ case TEST_OUT_BIT11:
+ case TEST_OUT_BIT22:
+ case TEST_OUT_FULL_SUPP_TRACE:
+ return check_ioam6_data(&p, ioam6h, node1);
+
+ case TEST_IN_BIT0:
+ case TEST_IN_BIT1:
+ case TEST_IN_BIT2:
+ case TEST_IN_BIT3:
+ case TEST_IN_BIT4:
+ case TEST_IN_BIT5:
+ case TEST_IN_BIT6:
+ case TEST_IN_BIT7:
+ case TEST_IN_BIT8:
+ case TEST_IN_BIT9:
+ case TEST_IN_BIT10:
+ case TEST_IN_BIT11:
+ case TEST_IN_BIT22:
+ case TEST_IN_FULL_SUPP_TRACE:
+ {
+ __u32 tmp32 = node2.egr_wide;
+ __u16 tmp16 = node2.egr_id;
+ int res;
+
+ node2.egr_id = 0xffff;
+ node2.egr_wide = 0xffffffff;
+
+ res = check_ioam6_data(&p, ioam6h, node2);
+
+ node2.egr_id = tmp16;
+ node2.egr_wide = tmp32;
+
+ return res;
+ }
+
+ case TEST_FWD_FULL_SUPP_TRACE:
+ if (check_ioam6_data(&p, ioam6h, node3))
+ return 1;
+ if (check_ioam6_data(&p, ioam6h, node2))
+ return 1;
+ return check_ioam6_data(&p, ioam6h, node1);
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int str2id(const char *tname)
+{
+ if (!strcmp("out_undef_ns", tname))
+ return TEST_OUT_UNDEF_NS;
+ if (!strcmp("out_no_room", tname))
+ return TEST_OUT_NO_ROOM;
+ if (!strcmp("out_bit0", tname))
+ return TEST_OUT_BIT0;
+ if (!strcmp("out_bit1", tname))
+ return TEST_OUT_BIT1;
+ if (!strcmp("out_bit2", tname))
+ return TEST_OUT_BIT2;
+ if (!strcmp("out_bit3", tname))
+ return TEST_OUT_BIT3;
+ if (!strcmp("out_bit4", tname))
+ return TEST_OUT_BIT4;
+ if (!strcmp("out_bit5", tname))
+ return TEST_OUT_BIT5;
+ if (!strcmp("out_bit6", tname))
+ return TEST_OUT_BIT6;
+ if (!strcmp("out_bit7", tname))
+ return TEST_OUT_BIT7;
+ if (!strcmp("out_bit8", tname))
+ return TEST_OUT_BIT8;
+ if (!strcmp("out_bit9", tname))
+ return TEST_OUT_BIT9;
+ if (!strcmp("out_bit10", tname))
+ return TEST_OUT_BIT10;
+ if (!strcmp("out_bit11", tname))
+ return TEST_OUT_BIT11;
+ if (!strcmp("out_bit12", tname))
+ return TEST_OUT_BIT12;
+ if (!strcmp("out_bit13", tname))
+ return TEST_OUT_BIT13;
+ if (!strcmp("out_bit14", tname))
+ return TEST_OUT_BIT14;
+ if (!strcmp("out_bit15", tname))
+ return TEST_OUT_BIT15;
+ if (!strcmp("out_bit16", tname))
+ return TEST_OUT_BIT16;
+ if (!strcmp("out_bit17", tname))
+ return TEST_OUT_BIT17;
+ if (!strcmp("out_bit18", tname))
+ return TEST_OUT_BIT18;
+ if (!strcmp("out_bit19", tname))
+ return TEST_OUT_BIT19;
+ if (!strcmp("out_bit20", tname))
+ return TEST_OUT_BIT20;
+ if (!strcmp("out_bit21", tname))
+ return TEST_OUT_BIT21;
+ if (!strcmp("out_bit22", tname))
+ return TEST_OUT_BIT22;
+ if (!strcmp("out_full_supp_trace", tname))
+ return TEST_OUT_FULL_SUPP_TRACE;
+ if (!strcmp("in_undef_ns", tname))
+ return TEST_IN_UNDEF_NS;
+ if (!strcmp("in_no_room", tname))
+ return TEST_IN_NO_ROOM;
+ if (!strcmp("in_oflag", tname))
+ return TEST_IN_OFLAG;
+ if (!strcmp("in_bit0", tname))
+ return TEST_IN_BIT0;
+ if (!strcmp("in_bit1", tname))
+ return TEST_IN_BIT1;
+ if (!strcmp("in_bit2", tname))
+ return TEST_IN_BIT2;
+ if (!strcmp("in_bit3", tname))
+ return TEST_IN_BIT3;
+ if (!strcmp("in_bit4", tname))
+ return TEST_IN_BIT4;
+ if (!strcmp("in_bit5", tname))
+ return TEST_IN_BIT5;
+ if (!strcmp("in_bit6", tname))
+ return TEST_IN_BIT6;
+ if (!strcmp("in_bit7", tname))
+ return TEST_IN_BIT7;
+ if (!strcmp("in_bit8", tname))
+ return TEST_IN_BIT8;
+ if (!strcmp("in_bit9", tname))
+ return TEST_IN_BIT9;
+ if (!strcmp("in_bit10", tname))
+ return TEST_IN_BIT10;
+ if (!strcmp("in_bit11", tname))
+ return TEST_IN_BIT11;
+ if (!strcmp("in_bit12", tname))
+ return TEST_IN_BIT12;
+ if (!strcmp("in_bit13", tname))
+ return TEST_IN_BIT13;
+ if (!strcmp("in_bit14", tname))
+ return TEST_IN_BIT14;
+ if (!strcmp("in_bit15", tname))
+ return TEST_IN_BIT15;
+ if (!strcmp("in_bit16", tname))
+ return TEST_IN_BIT16;
+ if (!strcmp("in_bit17", tname))
+ return TEST_IN_BIT17;
+ if (!strcmp("in_bit18", tname))
+ return TEST_IN_BIT18;
+ if (!strcmp("in_bit19", tname))
+ return TEST_IN_BIT19;
+ if (!strcmp("in_bit20", tname))
+ return TEST_IN_BIT20;
+ if (!strcmp("in_bit21", tname))
+ return TEST_IN_BIT21;
+ if (!strcmp("in_bit22", tname))
+ return TEST_IN_BIT22;
+ if (!strcmp("in_full_supp_trace", tname))
+ return TEST_IN_FULL_SUPP_TRACE;
+ if (!strcmp("fwd_full_supp_trace", tname))
+ return TEST_FWD_FULL_SUPP_TRACE;
+
+ return -1;
+}
+
+static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2)
+{
+ return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) |
+ (a1->s6_addr32[1] ^ a2->s6_addr32[1]) |
+ (a1->s6_addr32[2] ^ a2->s6_addr32[2]) |
+ (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0;
+}
+
+static int get_u32(__u32 *val, const char *arg, int base)
+{
+ unsigned long res;
+ char *ptr;
+
+ if (!arg || !*arg)
+ return -1;
+ res = strtoul(arg, &ptr, base);
+
+ if (!ptr || ptr == arg || *ptr)
+ return -1;
+
+ if (res == ULONG_MAX && errno == ERANGE)
+ return -1;
+
+ if (res > 0xFFFFFFFFUL)
+ return -1;
+
+ *val = res;
+ return 0;
+}
+
+static int get_u16(__u16 *val, const char *arg, int base)
+{
+ unsigned long res;
+ char *ptr;
+
+ if (!arg || !*arg)
+ return -1;
+ res = strtoul(arg, &ptr, base);
+
+ if (!ptr || ptr == arg || *ptr)
+ return -1;
+
+ if (res == ULONG_MAX && errno == ERANGE)
+ return -1;
+
+ if (res > 0xFFFFUL)
+ return -1;
+
+ *val = res;
+ return 0;
+}
+
+static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
+ [TEST_OUT_UNDEF_NS] = check_ioam_header,
+ [TEST_OUT_NO_ROOM] = check_ioam_header,
+ [TEST_OUT_BIT0] = check_ioam_header_and_data,
+ [TEST_OUT_BIT1] = check_ioam_header_and_data,
+ [TEST_OUT_BIT2] = check_ioam_header_and_data,
+ [TEST_OUT_BIT3] = check_ioam_header_and_data,
+ [TEST_OUT_BIT4] = check_ioam_header_and_data,
+ [TEST_OUT_BIT5] = check_ioam_header_and_data,
+ [TEST_OUT_BIT6] = check_ioam_header_and_data,
+ [TEST_OUT_BIT7] = check_ioam_header_and_data,
+ [TEST_OUT_BIT8] = check_ioam_header_and_data,
+ [TEST_OUT_BIT9] = check_ioam_header_and_data,
+ [TEST_OUT_BIT10] = check_ioam_header_and_data,
+ [TEST_OUT_BIT11] = check_ioam_header_and_data,
+ [TEST_OUT_BIT12] = check_ioam_header,
+ [TEST_OUT_BIT13] = check_ioam_header,
+ [TEST_OUT_BIT14] = check_ioam_header,
+ [TEST_OUT_BIT15] = check_ioam_header,
+ [TEST_OUT_BIT16] = check_ioam_header,
+ [TEST_OUT_BIT17] = check_ioam_header,
+ [TEST_OUT_BIT18] = check_ioam_header,
+ [TEST_OUT_BIT19] = check_ioam_header,
+ [TEST_OUT_BIT20] = check_ioam_header,
+ [TEST_OUT_BIT21] = check_ioam_header,
+ [TEST_OUT_BIT22] = check_ioam_header_and_data,
+ [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+ [TEST_IN_UNDEF_NS] = check_ioam_header,
+ [TEST_IN_NO_ROOM] = check_ioam_header,
+ [TEST_IN_OFLAG] = check_ioam_header,
+ [TEST_IN_BIT0] = check_ioam_header_and_data,
+ [TEST_IN_BIT1] = check_ioam_header_and_data,
+ [TEST_IN_BIT2] = check_ioam_header_and_data,
+ [TEST_IN_BIT3] = check_ioam_header_and_data,
+ [TEST_IN_BIT4] = check_ioam_header_and_data,
+ [TEST_IN_BIT5] = check_ioam_header_and_data,
+ [TEST_IN_BIT6] = check_ioam_header_and_data,
+ [TEST_IN_BIT7] = check_ioam_header_and_data,
+ [TEST_IN_BIT8] = check_ioam_header_and_data,
+ [TEST_IN_BIT9] = check_ioam_header_and_data,
+ [TEST_IN_BIT10] = check_ioam_header_and_data,
+ [TEST_IN_BIT11] = check_ioam_header_and_data,
+ [TEST_IN_BIT12] = check_ioam_header,
+ [TEST_IN_BIT13] = check_ioam_header,
+ [TEST_IN_BIT14] = check_ioam_header,
+ [TEST_IN_BIT15] = check_ioam_header,
+ [TEST_IN_BIT16] = check_ioam_header,
+ [TEST_IN_BIT17] = check_ioam_header,
+ [TEST_IN_BIT18] = check_ioam_header,
+ [TEST_IN_BIT19] = check_ioam_header,
+ [TEST_IN_BIT20] = check_ioam_header,
+ [TEST_IN_BIT21] = check_ioam_header,
+ [TEST_IN_BIT22] = check_ioam_header_and_data,
+ [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+ [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+};
+
+int main(int argc, char **argv)
+{
+ int fd, size, hoplen, tid, ret = 1;
+ struct in6_addr src, dst;
+ struct ioam6_hdr *opt;
+ struct ipv6hdr *ip6h;
+ __u8 buffer[400], *p;
+ __u16 ioam_ns;
+ __u32 tr_type;
+
+ if (argc != 7)
+ goto out;
+
+ tid = str2id(argv[2]);
+ if (tid < 0 || !func[tid])
+ goto out;
+
+ if (inet_pton(AF_INET6, argv[3], &src) != 1 ||
+ inet_pton(AF_INET6, argv[4], &dst) != 1)
+ goto out;
+
+ if (get_u32(&tr_type, argv[5], 16) ||
+ get_u16(&ioam_ns, argv[6], 0))
+ goto out;
+
+ fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6));
+ if (!fd)
+ goto out;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ argv[1], strlen(argv[1])))
+ goto close;
+
+recv:
+ size = recv(fd, buffer, sizeof(buffer), 0);
+ if (size <= 0)
+ goto close;
+
+ ip6h = (struct ipv6hdr *)buffer;
+
+ if (!ipv6_addr_equal(&ip6h->saddr, &src) ||
+ !ipv6_addr_equal(&ip6h->daddr, &dst))
+ goto recv;
+
+ if (ip6h->nexthdr != IPPROTO_HOPOPTS)
+ goto close;
+
+ p = buffer + sizeof(*ip6h);
+ hoplen = (p[1] + 1) << 3;
+ p += sizeof(struct ipv6_hopopt_hdr);
+
+ while (hoplen > 0) {
+ opt = (struct ioam6_hdr *)p;
+
+ if (opt->opt_type == IPV6_TLV_IOAM &&
+ opt->type == IOAM6_TYPE_PREALLOC) {
+ p += sizeof(*opt);
+ ret = func[tid](tid, (struct ioam6_trace_hdr *)p,
+ tr_type, ioam_ns);
+ break;
+ }
+
+ p += opt->opt_len + 2;
+ hoplen -= opt->opt_len + 2;
+ }
+close:
+ close(fd);
+out:
+ return ret;
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f02f4de2f3a0..255793c5ac4f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -3,8 +3,10 @@
ret=0
sin=""
+sinfail=""
sout=""
cin=""
+cinfail=""
cinsent=""
cout=""
ksft_skip=4
@@ -76,6 +78,14 @@ init()
done
}
+init_shapers()
+{
+ for i in `seq 1 4`; do
+ tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
+ tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
+ done
+}
+
cleanup_partial()
{
rm -f "$capout"
@@ -88,8 +98,8 @@ cleanup_partial()
cleanup()
{
- rm -f "$cin" "$cout"
- rm -f "$sin" "$sout" "$cinsent"
+ rm -f "$cin" "$cout" "$sinfail"
+ rm -f "$sin" "$sout" "$cinsent" "$cinfail"
cleanup_partial
}
@@ -211,11 +221,15 @@ link_failure()
{
ns="$1"
- l=$((RANDOM%4))
- l=$((l+1))
+ if [ -z "$FAILING_LINKS" ]; then
+ l=$((RANDOM%4))
+ FAILING_LINKS=$((l+1))
+ fi
- veth="ns1eth$l"
- ip -net "$ns" link set "$veth" down
+ for l in $FAILING_LINKS; do
+ veth="ns1eth$l"
+ ip -net "$ns" link set "$veth" down
+ done
}
# $1: IP address
@@ -280,10 +294,17 @@ do_transfer()
local_addr="0.0.0.0"
fi
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- ${local_addr} < "$sin" > "$sout" &
+ if [ "$test_link_fail" -eq 2 ];then
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -p $port -s ${cl_proto} \
+ ${local_addr} < "$sinfail" > "$sout" &
+ else
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ ${local_addr} < "$sin" > "$sout" &
+ fi
spid=$!
sleep 1
@@ -294,7 +315,7 @@ do_transfer()
$mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
$connect_addr < "$cin" > "$cout" &
else
- ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \
+ ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
tee "$cinsent" | \
timeout ${timeout_test} \
ip netns exec ${connector_ns} \
@@ -323,17 +344,18 @@ do_transfer()
let rm_nr_ns1=-addr_nr_ns1
if [ $rm_nr_ns1 -lt 8 ]; then
counter=1
+ pos=1
dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
- id=${dump[1]}
sleep 1
while [ $counter -le $rm_nr_ns1 ]
do
+ id=${dump[$pos]}
ip netns exec ${listener_ns} ./pm_nl_ctl del $id
sleep 1
let counter+=1
- let id+=1
+ let pos+=5
done
fi
elif [ $rm_nr_ns1 -eq 8 ]; then
@@ -345,6 +367,12 @@ do_transfer()
fi
fi
+ flags="subflow"
+ if [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
+ flags="${flags},fullmesh"
+ addr_nr_ns2=${addr_nr_ns2:9}
+ fi
+
if [ $addr_nr_ns2 -gt 0 ]; then
let add_nr_ns2=addr_nr_ns2
counter=3
@@ -356,7 +384,7 @@ do_transfer()
else
addr="10.0.$counter.2"
fi
- ip netns exec $ns2 ./pm_nl_ctl add $addr flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags
let counter+=1
let add_nr_ns2-=1
done
@@ -365,17 +393,18 @@ do_transfer()
let rm_nr_ns2=-addr_nr_ns2
if [ $rm_nr_ns2 -lt 8 ]; then
counter=1
+ pos=1
dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
- id=${dump[1]}
sleep 1
while [ $counter -le $rm_nr_ns2 ]
do
+ id=${dump[$pos]}
ip netns exec ${connector_ns} ./pm_nl_ctl del $id
sleep 1
let counter+=1
- let id+=1
+ let pos+=5
done
fi
elif [ $rm_nr_ns2 -eq 8 ]; then
@@ -434,7 +463,11 @@ do_transfer()
return 1
fi
- check_transfer $sin $cout "file received by client"
+ if [ "$test_link_fail" -eq 2 ];then
+ check_transfer $sinfail $cout "file received by client"
+ else
+ check_transfer $sin $cout "file received by client"
+ fi
retc=$?
if [ "$test_link_fail" -eq 0 ];then
check_transfer $cin $sout "file received by server"
@@ -477,29 +510,33 @@ run_tests()
lret=0
oldin=""
- if [ "$test_linkfail" -eq 1 ];then
- size=$((RANDOM%1024))
+ # create the input file for the failure test when
+ # the first failure test run
+ if [ "$test_linkfail" -ne 0 -a -z "$cinfail" ]; then
+ # the client file must be considerably larger
+ # of the maximum expected cwin value, or the
+ # link utilization will be not predicable
+ size=$((RANDOM%2))
size=$((size+1))
- size=$((size*128))
+ size=$((size*8192))
+ size=$((size + ( $RANDOM % 8192) ))
- oldin=$(mktemp)
- cp "$cin" "$oldin"
- make_file "$cin" "client" $size
+ cinfail=$(mktemp)
+ make_file "$cinfail" "client" $size
fi
- do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
- ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
- lret=$?
+ if [ "$test_linkfail" -eq 2 -a -z "$sinfail" ]; then
+ size=$((RANDOM%16))
+ size=$((size+1))
+ size=$((size*2048))
- if [ "$test_linkfail" -eq 1 ];then
- cp "$oldin" "$cin"
- rm -f "$oldin"
+ sinfail=$(mktemp)
+ make_file "$sinfail" "server" $size
fi
- if [ $lret -ne 0 ]; then
- ret=$lret
- return
- fi
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
+ ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
+ lret=$?
}
chk_csum_nr()
@@ -541,6 +578,43 @@ chk_csum_nr()
fi
}
+chk_fail_nr()
+{
+ local mp_fail_nr_tx=$1
+ local mp_fail_nr_rx=$2
+ local count
+ local dump_stats
+
+ printf "%-39s %s" " " "ftx"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$mp_fail_nr_tx" ]; then
+ echo "[fail] got $count MP_FAIL[s] TX expected $mp_fail_nr_tx"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - frx "
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$mp_fail_nr_rx" ]; then
+ echo "[fail] got $count MP_FAIL[s] RX expected $mp_fail_nr_rx"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
chk_join_nr()
{
local msg="$1"
@@ -590,6 +664,47 @@ chk_join_nr()
fi
if [ $checksum -eq 1 ]; then
chk_csum_nr
+ chk_fail_nr 0 0
+ fi
+}
+
+# a negative value for 'stale_max' means no upper bound:
+# for bidirectional transfer, if one peer sleep for a while
+# - as these tests do - we can have a quite high number of
+# stale/recover conversions, proportional to
+# sleep duration/ MPTCP-level RTX interval.
+chk_stale_nr()
+{
+ local ns=$1
+ local stale_min=$2
+ local stale_max=$3
+ local stale_delta=$4
+ local dump_stats
+ local stale_nr
+ local recover_nr
+
+ printf "%-39s %-18s" " " "stale"
+ stale_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}'`
+ [ -z "$stale_nr" ] && stale_nr=0
+ recover_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}'`
+ [ -z "$recover_nr" ] && recover_nr=0
+
+ if [ $stale_nr -lt $stale_min ] ||
+ [ $stale_max -gt 0 -a $stale_nr -gt $stale_max ] ||
+ [ $((stale_nr - $recover_nr)) -ne $stale_delta ]; then
+ echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \
+ " expected stale in range [$stale_min..$stale_max]," \
+ " stale-recover delta $stale_delta "
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo $ns stats
+ ip netns exec $ns ip -s link show
+ ip netns exec $ns nstat -as | grep MPTcp
fi
}
@@ -801,6 +916,27 @@ chk_prio_nr()
fi
}
+chk_link_usage()
+{
+ local ns=$1
+ local link=$2
+ local out=$3
+ local expected_rate=$4
+ local tx_link=`ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes`
+ local tx_total=`ls -l $out | awk '{print $5}'`
+ local tx_rate=$((tx_link * 100 / $tx_total))
+ local tolerance=5
+
+ printf "%-39s %-18s" " " "link usage"
+ if [ $tx_rate -lt $((expected_rate - $tolerance)) -o \
+ $tx_rate -gt $((expected_rate + $tolerance)) ]; then
+ echo "[fail] got $tx_rate% usage, expected $expected_rate%"
+ ret=1
+ else
+ echo "[ ok ]"
+ fi
+}
+
subflows_tests()
{
reset
@@ -918,20 +1054,101 @@ signal_address_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal invalid addresses" 1 1 1
chk_add_nr 3 3
+
+ # signal addresses race test
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+ ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_add_nr 4 4
}
link_failure_tests()
{
# accept and use add_addr with additional subflows and link loss
reset
+
+ # without any b/w limit each veth could spool the packets and get
+ # them acked at xmit time, so that the corresponding subflow will
+ # have almost always no outstanding pkts, the scheduler will pick
+ # always the first subflow and we will have hard time testing
+ # active backup and link switch-over.
+ # Let's set some arbitrary (low) virtual link limits.
+ init_shapers
ip netns exec $ns1 ./pm_nl_ctl limits 0 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
run_tests $ns1 $ns2 10.0.1.1 1
chk_join_nr "multiple flows, signal, link failure" 3 3 3
chk_add_nr 1 1
+ chk_stale_nr $ns2 1 5 1
+
+ # accept and use add_addr with additional subflows and link loss
+ # for bidirectional transfer
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 2
+ chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 1 -1 1
+
+ # 2 subflows plus 1 backup subflow with a lossy link, backup
+ # will never be used
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ export FAILING_LINKS="1"
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ run_tests $ns1 $ns2 10.0.1.1 1
+ chk_join_nr "backup subflow unused, link failure" 2 2 2
+ chk_add_nr 1 1
+ chk_link_usage $ns2 ns2eth3 $cinsent 0
+
+ # 2 lossy links after half transfer, backup will get half of
+ # the traffic
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ export FAILING_LINKS="1 2"
+ run_tests $ns1 $ns2 10.0.1.1 1
+ chk_join_nr "backup flow used, multi links fail" 2 2 2
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 2 4 2
+ chk_link_usage $ns2 ns2eth3 $cinsent 50
+
+ # use a backup subflow with the first subflow on a lossy link
+ # for bidirectional transfer
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ run_tests $ns1 $ns2 10.0.1.1 2
+ chk_join_nr "backup flow used, bidi, link failure" 2 2 2
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 1 -1 2
+ chk_link_usage $ns2 ns2eth3 $cinsent 50
}
add_addr_timeout_tests()
@@ -1530,6 +1747,55 @@ deny_join_id0_tests()
chk_join_nr "subflow and address allow join id0 2" 1 1 1
}
+fullmesh_tests()
+{
+ # fullmesh 1
+ # 2 fullmesh addrs in ns2, added before the connection,
+ # 1 non-fullmesh addr in ns1, added during the connection.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 4
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 4
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh
+ run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
+ chk_join_nr "fullmesh test 2x1" 4 4 4
+ chk_add_nr 1 1
+
+ # fullmesh 2
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 1 fullmesh addr in ns2, added during the connection.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
+ chk_join_nr "fullmesh test 1x1" 3 3 3
+ chk_add_nr 1 1
+
+ # fullmesh 3
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 2 fullmesh addrs in ns2, added during the connection.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 5
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 5
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+ chk_join_nr "fullmesh test 1x2" 5 5 5
+ chk_add_nr 1 1
+
+ # fullmesh 4
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 2 fullmesh addrs in ns2, added during the connection,
+ # limit max_subflows to 4.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 4
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 4
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+ chk_join_nr "fullmesh test 1x2, limited" 4 4 4
+ chk_add_nr 1 1
+}
+
all_tests()
{
subflows_tests
@@ -1545,6 +1811,7 @@ all_tests()
syncookies_tests
checksum_tests
deny_join_id0_tests
+ fullmesh_tests
}
usage()
@@ -1563,6 +1830,7 @@ usage()
echo " -k syncookies_tests"
echo " -S checksum_tests"
echo " -d deny_join_id0_tests"
+ echo " -m fullmesh_tests"
echo " -c capture pcap files"
echo " -C enable data checksum"
echo " -h help"
@@ -1598,7 +1866,7 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
-while getopts 'fsltra64bpkdchCS' opt; do
+while getopts 'fsltra64bpkdmchCS' opt; do
case $opt in
f)
subflows_tests
@@ -1639,6 +1907,9 @@ while getopts 'fsltra64bpkdchCS' opt; do
d)
deny_join_id0_tests
;;
+ m)
+ fullmesh_tests
+ ;;
c)
;;
C)
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 115decfdc1ef..354784512748 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -25,7 +25,7 @@
static void syntax(char *argv[])
{
fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
- fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
+ fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
@@ -236,11 +236,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
else if (!strcmp(tok, "backup"))
flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else if (!strcmp(tok, "fullmesh"))
+ flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
else
error(1, errno,
"unknown flag %s", argv[arg]);
}
+ if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
+ flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+ error(1, errno, "error flag fullmesh");
+ }
+
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
rta->rta_len = RTA_LENGTH(4);
@@ -422,6 +429,13 @@ static void print_addr(struct rtattr *attrs, int len)
printf(",");
}
+ if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+ printf("fullmesh");
+ flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH;
+ if (flags)
+ printf(",");
+ }
+
/* bump unknown flags, if any */
if (flags)
printf("0x%x", flags);
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index db4521335722..3653d6468c67 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -111,8 +111,8 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
static void sock_fanout_set_cbpf(int fd)
{
struct sock_filter bpf_filter[] = {
- BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80), /* ldb [80] */
- BPF_STMT(BPF_RET+BPF_A, 0), /* ret A */
+ BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 80), /* ldb [80] */
+ BPF_STMT(BPF_RET | BPF_A, 0), /* ret A */
};
struct sock_fprog bpf_prog;
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
index 170be65e0816..1cbfeb5052ec 100755
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -86,9 +86,6 @@ echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
echo "raw gso min size"
./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
-echo "raw gso min size - 1 (expected to fail)"
-(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
-
echo "raw gso max size"
./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index 8b42e8b04e0f..a59cb6a3c4f5 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -1,9 +1,12 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
if [ $(id -u) != 0 ]; then
echo $msg must be run as root >&2
- exit 0
+ exit $ksft_skip
fi
ret=0
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
new file mode 100755
index 000000000000..e57bbfbc5208
--- /dev/null
+++ b/tools/testing/selftests/net/setup_loopback.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
+readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
+readonly HARD_IRQS="$(< ${IRQ_PATH})"
+
+netdev_check_for_carrier() {
+ local -r dev="$1"
+
+ for i in {1..5}; do
+ carrier="$(cat /sys/class/net/${dev}/carrier)"
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "carrier not ready yet..." >&2
+ sleep 1
+ else
+ echo "carrier ready" >&2
+ break
+ fi
+ done
+ echo "${carrier}"
+}
+
+# Assumes that there is no existing ipvlan device on the physical device
+setup_loopback_environment() {
+ local dev="$1"
+
+ # Fail hard if cannot turn on loopback mode for current NIC
+ ethtool -K "${dev}" loopback on || exit 1
+ sleep 1
+
+ # Check for the carrier
+ carrier=$(netdev_check_for_carrier ${dev})
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "setup_loopback_environment failed"
+ exit 1
+ fi
+}
+
+setup_macvlan_ns(){
+ local -r link_dev="$1"
+ local -r ns_name="$2"
+ local -r ns_dev="$3"
+ local -r ns_mac="$4"
+ local -r addr="$5"
+
+ ip link add link "${link_dev}" dev "${ns_dev}" \
+ address "${ns_mac}" type macvlan
+ exit_code=$?
+ if [[ "${exit_code}" -ne 0 ]]; then
+ echo "setup_macvlan_ns failed"
+ exit $exit_code
+ fi
+
+ [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+ ip link set dev "${ns_dev}" netns "${ns_name}"
+ ip -netns "${ns_name}" link set dev "${ns_dev}" up
+ if [[ -n "${addr}" ]]; then
+ ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
+ fi
+
+ sleep 1
+}
+
+cleanup_macvlan_ns(){
+ while (( $# >= 2 )); do
+ ns_name="$1"
+ ns_dev="$2"
+ ip -netns "${ns_name}" link del dev "${ns_dev}"
+ ip netns del "${ns_name}"
+ shift 2
+ done
+}
+
+cleanup_loopback(){
+ local -r dev="$1"
+
+ ethtool -K "${dev}" loopback off
+ sleep 1
+
+ # Check for the carrier
+ carrier=$(netdev_check_for_carrier ${dev})
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "setup_loopback_environment failed"
+ exit 1
+ fi
+}
+
+setup_interrupt() {
+ # Use timer on host to trigger the network stack
+ # Also disable device interrupt to not depend on NIC interrupt
+ # Reduce test flakiness caused by unexpected interrupts
+ echo 100000 >"${FLUSH_PATH}"
+ echo 50 >"${IRQ_PATH}"
+}
+
+setup_ns() {
+ # Set up server_ns namespace and client_ns namespace
+ setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}"
+ setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+ cleanup_macvlan_ns server_ns server client_ns client
+}
+
+setup() {
+ setup_loopback_environment "${dev}"
+ setup_interrupt
+}
+
+cleanup() {
+ cleanup_loopback "${dev}"
+
+ echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+ echo "${HARD_IRQS}" >"${IRQ_PATH}"
+}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
new file mode 100644
index 000000000000..1003ddf7b3b2
--- /dev/null
+++ b/tools/testing/selftests/net/setup_veth.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+setup_veth_ns() {
+ local -r link_dev="$1"
+ local -r ns_name="$2"
+ local -r ns_dev="$3"
+ local -r ns_mac="$4"
+
+ [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+ echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
+ ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
+ ip -netns "${ns_name}" link set dev "${ns_dev}" up
+
+ ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
+}
+
+setup_ns() {
+ # Set up server_ns namespace and client_ns namespace
+ ip link add name server type veth peer name client
+
+ setup_veth_ns "${dev}" server_ns server "${SERVER_MAC}"
+ setup_veth_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+ local ns_name
+
+ for ns_name in client_ns server_ns; do
+ [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
+ done
+}
+
+setup() {
+ # no global init setup step needed
+ :
+}
+
+cleanup() {
+ cleanup_ns
+}
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
index 75ada17ac061..aebaab8ce44c 100755
--- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -193,6 +193,9 @@
# +---------------------------------------------------+
#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv6_HS_NETWORK=cafe
@@ -543,18 +546,18 @@ host_vpn_isolation_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index ad7a9fc59934..1003119773e5 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -163,6 +163,9 @@
# +---------------------------------------------------+
#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv4_HS_NETWORK=10.0.0
@@ -464,18 +467,18 @@ host_vpn_isolation_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
index 68708f5e26a0..b9b06ef80d88 100755
--- a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
@@ -164,6 +164,9 @@
# +---------------------------------------------------+
#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv6_HS_NETWORK=cafe
@@ -472,18 +475,18 @@ host_vpn_isolation_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
new file mode 100644
index 000000000000..710ac956bdb3
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define TOEPLITZ_KEY_MIN_LEN 40
+#define TOEPLITZ_KEY_MAX_LEN 60
+
+#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+
+#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport = 8000;
+static int cfg_family = AF_INET6;
+static char *cfg_ifname = "eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type = SOCK_STREAM;
+static int cfg_timeout_msec = 1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+ int fd;
+ char *mmap;
+ int idx;
+ int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+ const unsigned char *key)
+{
+ int i, bit, ret = 0;
+ uint32_t key32;
+
+ key32 = ntohl(*((uint32_t *)key));
+ key += 4;
+
+ for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+ for (bit = 7; bit >= 0; bit--) {
+ if (four_tuple[i] & (1 << bit))
+ ret ^= key32;
+
+ key32 <<= 1;
+ key32 |= !!(key[0] & (1 << bit));
+ }
+ key++;
+ }
+
+ return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+ int queue = rx_hash % cfg_num_queues;
+
+ log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+ if (rx_irq_cpus[queue] != cpu) {
+ log_verbose(". error: rss cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+ int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+ log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+ if (rps_silo_to_cpu[silo] != cpu) {
+ log_verbose(". error: rps cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+ const char *addrs, int addr_len)
+{
+ char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+ uint16_t *ports;
+
+ if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+ !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+ error(1, 0, "address parse error");
+
+ ports = (void *)addrs + (addr_len * 2);
+ log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+ cpu, rx_hash, saddr, daddr,
+ ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+ unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+ uint32_t rx_hash_sw;
+ const char *addrs;
+ int addr_len;
+
+ if (cfg_family == AF_INET) {
+ addr_len = sizeof(struct in_addr);
+ addrs = pkt + offsetof(struct iphdr, saddr);
+ } else {
+ addr_len = sizeof(struct in6_addr);
+ addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+ }
+
+ memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+ rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+ if (cfg_verbose)
+ log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+ if (rx_hash != rx_hash_sw) {
+ log_verbose(" != expected 0x%x\n", rx_hash_sw);
+ frames_error++;
+ return;
+ }
+
+ log_verbose(" OK");
+ if (cfg_num_queues)
+ verify_rss(rx_hash, cpu);
+ else if (cfg_num_rps_cpus)
+ verify_rps(rx_hash, cpu);
+ log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+ struct tpacket3_hdr *hdr = (void *)frame;
+
+ if (hdr->hv1.tp_rxhash)
+ verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+ ring->cpu);
+ else
+ frames_nohash++;
+
+ return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static void recv_block(struct ring_state *ring)
+{
+ struct tpacket_block_desc *block;
+ char *frame;
+ int i;
+
+ block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+ if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+ return;
+
+ frame = (char *)block;
+ frame += block->hdr.bh1.offset_to_first_pkt;
+
+ for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+ frame = recv_frame(ring, frame);
+ frames_received++;
+ }
+
+ block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+ ring->idx = (ring->idx + 1) % ring_block_nr;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+ int i;
+
+ usleep(1000 * cfg_timeout_msec);
+
+ for (i = 0; i < num_cpus; i++)
+ recv_block(&rings[i]);
+
+ fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+ frames_received - frames_nohash - frames_error,
+ frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+ struct tpacket_req3 req3 = {0};
+ void *ring;
+
+ req3.tp_retire_blk_tov = cfg_timeout_msec;
+ req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+ req3.tp_frame_size = 2048;
+ req3.tp_frame_nr = 1 << 10;
+ req3.tp_block_nr = 2;
+
+ req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+ req3.tp_block_size /= req3.tp_block_nr;
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+ error(1, errno, "setsockopt PACKET_RX_RING");
+
+ ring_block_sz = req3.tp_block_size;
+ ring_block_nr = req3.tp_block_nr;
+
+ ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+ if (ring == MAP_FAILED)
+ error(1, 0, "mmap failed");
+
+ return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = sizeof(filter) / sizeof(struct sock_filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+ const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
+ uint8_t proto;
+
+ proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+ if (cfg_family == AF_INET)
+ __set_filter(fd, offsetof(struct iphdr, protocol), proto,
+ sizeof(struct iphdr) + off_dport);
+ else
+ __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+ sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = sizeof(filter) / sizeof(struct sock_filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+ struct fanout_args args = {
+ .id = 1,
+ .type_flags = PACKET_FANOUT_CPU,
+ .max_num_members = RSS_MAX_CPUS
+ };
+ struct sockaddr_ll ll = { 0 };
+ int fd, val;
+
+ fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket creation failed");
+
+ val = TPACKET_V3;
+ if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+ error(1, errno, "setsockopt PACKET_VERSION");
+ *ring = setup_ring(fd);
+
+ /* block packets until all rings are added to the fanout group:
+ * else packets can arrive during setup and get misclassified
+ */
+ set_filter_null(fd);
+
+ ll.sll_family = AF_PACKET;
+ ll.sll_ifindex = if_nametoindex(cfg_ifname);
+ ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+ htons(ETH_P_IPV6);
+ if (bind(fd, (void *)&ll, sizeof(ll)))
+ error(1, errno, "bind");
+
+ /* must come after bind: verifies all programs in group match */
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+ /* on failure, retry using old API if that is sufficient:
+ * it has a hard limit of 256 sockets, so only try if
+ * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+ * in this API, the third argument is left implicit.
+ */
+ if (cfg_num_queues || num_cpus > 256 ||
+ setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+ &args, sizeof(uint32_t)))
+ error(1, errno, "setsockopt PACKET_FANOUT cpu");
+ }
+
+ return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+ int fd, val;
+
+ fd = socket(cfg_family, cfg_type, 0);
+ if (fd == -1)
+ error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+ val = 1 << 20;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+
+ return fd;
+}
+
+static void setup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ rings[i].cpu = i;
+ rings[i].fd = create_ring(&rings[i].mmap);
+ }
+
+ /* accept packets once all rings in the fanout group are up */
+ for (i = 0; i < num_cpus; i++)
+ set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+ error(1, errno, "munmap");
+ if (close(rings[i].fd))
+ error(1, errno, "close");
+ }
+}
+
+static void parse_cpulist(const char *arg)
+{
+ do {
+ rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+ arg = strchr(arg, ',');
+ if (!arg)
+ break;
+ arg++; // skip ','
+ } while (1);
+}
+
+static void show_cpulist(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_queues; i++)
+ fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_rps_cpus; i++)
+ fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+ int i, ret, off;
+
+ if (slen < TOEPLITZ_STR_MIN_LEN ||
+ slen > TOEPLITZ_STR_MAX_LEN + 1)
+ error(1, 0, "invalid toeplitz key");
+
+ for (i = 0, off = 0; off < slen; i++, off += 3) {
+ ret = sscanf(str + off, "%hhx", &key[i]);
+ if (ret != 1)
+ error(1, 0, "key parse error at %d off %d len %d",
+ i, off, slen);
+ }
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+ unsigned long bitmap;
+ int i;
+
+ bitmap = strtoul(arg, NULL, 0);
+
+ if (bitmap & ~(RPS_MAX_CPUS - 1))
+ error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+ bitmap, RPS_MAX_CPUS - 1);
+
+ for (i = 0; i < RPS_MAX_CPUS; i++)
+ if (bitmap & 1UL << i)
+ rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ static struct option long_options[] = {
+ {"dport", required_argument, 0, 'd'},
+ {"cpus", required_argument, 0, 'C'},
+ {"key", required_argument, 0, 'k'},
+ {"iface", required_argument, 0, 'i'},
+ {"ipv4", no_argument, 0, '4'},
+ {"ipv6", no_argument, 0, '6'},
+ {"sink", no_argument, 0, 's'},
+ {"tcp", no_argument, 0, 't'},
+ {"timeout", required_argument, 0, 'T'},
+ {"udp", no_argument, 0, 'u'},
+ {"verbose", no_argument, 0, 'v'},
+ {"rps", required_argument, 0, 'r'},
+ {0, 0, 0, 0}
+ };
+ bool have_toeplitz = false;
+ int index, c;
+
+ while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = AF_INET;
+ break;
+ case '6':
+ cfg_family = AF_INET6;
+ break;
+ case 'C':
+ parse_cpulist(optarg);
+ break;
+ case 'd':
+ cfg_dport = strtol(optarg, NULL, 0);
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'k':
+ parse_toeplitz_key(optarg, strlen(optarg),
+ toeplitz_key);
+ have_toeplitz = true;
+ break;
+ case 'r':
+ parse_rps_bitmap(optarg);
+ break;
+ case 's':
+ cfg_sink = true;
+ break;
+ case 't':
+ cfg_type = SOCK_STREAM;
+ break;
+ case 'T':
+ cfg_timeout_msec = strtol(optarg, NULL, 0);
+ break;
+ case 'u':
+ cfg_type = SOCK_DGRAM;
+ break;
+ case 'v':
+ cfg_verbose = true;
+ break;
+
+ default:
+ error(1, 0, "unknown option %c", optopt);
+ break;
+ }
+ }
+
+ if (!have_toeplitz)
+ error(1, 0, "Must supply rss key ('-k')");
+
+ num_cpus = get_nprocs();
+ if (num_cpus > RSS_MAX_CPUS)
+ error(1, 0, "increase RSS_MAX_CPUS");
+
+ if (cfg_num_queues && cfg_num_rps_cpus)
+ error(1, 0,
+ "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+ if (cfg_verbose) {
+ show_cpulist();
+ show_silos();
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const int min_tests = 10;
+ int fd_sink = -1;
+
+ parse_opts(argc, argv);
+
+ if (cfg_sink)
+ fd_sink = setup_sink();
+
+ setup_rings();
+ process_rings();
+ cleanup_rings();
+
+ if (cfg_sink && close(fd_sink))
+ error(1, errno, "close sink");
+
+ if (frames_received - frames_nohash < min_tests)
+ error(1, 0, "too few frames for verification");
+
+ return frames_error;
+}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
new file mode 100755
index 000000000000..0a49907cd4fe
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.sh
@@ -0,0 +1,199 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
+# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
+# ('-rps <rps_map>')
+#
+# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
+# which is a driver-specific encoding.
+#
+# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
+# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
+
+source setup_loopback.sh
+readonly SERVER_IP4="192.168.1.200/24"
+readonly SERVER_IP6="fda8::1/64"
+readonly SERVER_MAC="aa:00:00:00:00:02"
+
+readonly CLIENT_IP4="192.168.1.100/24"
+readonly CLIENT_IP6="fda8::2/64"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+PORT=8000
+KEY="$(</proc/sys/net/core/netdev_rss_key)"
+TEST_RSS=false
+RPS_MAP=""
+PROTO_FLAG=""
+IP_FLAG=""
+DEV="eth0"
+
+# Return the number of rxqs among which RSS is configured to spread packets.
+# This is determined by reading the RSS indirection table using ethtool.
+get_rss_cfg_num_rxqs() {
+ echo $(ethtool -x "${DEV}" |
+ egrep [[:space:]]+[0-9]+:[[:space:]]+ |
+ cut -d: -f2- |
+ awk '{$1=$1};1' |
+ tr ' ' '\n' |
+ sort -u |
+ wc -l)
+}
+
+# Return a list of the receive irq handler cpus.
+# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
+# Reads /sys/kernel/irq/ in order, so algorithm depends on
+# irq_{rxq-0} < irq_{rxq-1}, etc.
+get_rx_irq_cpus() {
+ CPUS=""
+ # sort so that irq 2 is read before irq 10
+ SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
+ # Consider only as many queues as RSS actually uses. We assume that
+ # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
+ RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
+ RXQ_COUNT=0
+
+ for i in ${SORTED_IRQS}
+ do
+ [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
+ # lookup relevant IRQs by action name
+ [[ -e "$i/actions" ]] || continue
+ cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
+ irqname=$(<"$i/actions")
+
+ # does the IRQ get called
+ irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
+ [[ -n "${irqcount}" ]] || continue
+
+ # lookup CPU
+ irq=$(basename "$i")
+ cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
+
+ if [[ -z "${CPUS}" ]]; then
+ CPUS="${cpu}"
+ else
+ CPUS="${CPUS},${cpu}"
+ fi
+ RXQ_COUNT=$((RXQ_COUNT+1))
+ done
+
+ echo "${CPUS}"
+}
+
+get_disable_rfs_cmd() {
+ echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
+}
+
+get_set_rps_bitmaps_cmd() {
+ CMD=""
+ for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
+ do
+ CMD="${CMD} echo $1 > ${i};"
+ done
+
+ echo "${CMD}"
+}
+
+get_disable_rps_cmd() {
+ echo "$(get_set_rps_bitmaps_cmd 0)"
+}
+
+die() {
+ echo "$1"
+ exit 1
+}
+
+check_nic_rxhash_enabled() {
+ local -r pattern="receive-hashing:\ on"
+
+ ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
+}
+
+parse_opts() {
+ local prog=$0
+ shift 1
+
+ while [[ "$1" =~ "-" ]]; do
+ if [[ "$1" = "-irq_prefix" ]]; then
+ shift
+ IRQ_PATTERN="^$1-[0-9]*$"
+ elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
+ PROTO_FLAG="$1"
+ elif [[ "$1" = "-4" ]]; then
+ IP_FLAG="$1"
+ SERVER_IP="${SERVER_IP4}"
+ CLIENT_IP="${CLIENT_IP4}"
+ elif [[ "$1" = "-6" ]]; then
+ IP_FLAG="$1"
+ SERVER_IP="${SERVER_IP6}"
+ CLIENT_IP="${CLIENT_IP6}"
+ elif [[ "$1" = "-rss" ]]; then
+ TEST_RSS=true
+ elif [[ "$1" = "-rps" ]]; then
+ shift
+ RPS_MAP="$1"
+ elif [[ "$1" = "-i" ]]; then
+ shift
+ DEV="$1"
+ else
+ die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
+ [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
+ fi
+ shift
+ done
+}
+
+setup() {
+ setup_loopback_environment "${DEV}"
+
+ # Set up server_ns namespace and client_ns namespace
+ setup_macvlan_ns "${DEV}" server_ns server \
+ "${SERVER_MAC}" "${SERVER_IP}"
+ setup_macvlan_ns "${DEV}" client_ns client \
+ "${CLIENT_MAC}" "${CLIENT_IP}"
+}
+
+cleanup() {
+ cleanup_macvlan_ns server_ns server client_ns client
+ cleanup_loopback "${DEV}"
+}
+
+parse_opts $0 $@
+
+setup
+trap cleanup EXIT
+
+check_nic_rxhash_enabled
+
+# Actual test starts here
+if [[ "${TEST_RSS}" = true ]]; then
+ # RPS/RFS must be disabled because they move packets between cpus,
+ # which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
+ eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+ -C "$(get_rx_irq_cpus)" -s -v &
+elif [[ ! -z "${RPS_MAP}" ]]; then
+ eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+ -r "0x${RPS_MAP}" -s -v &
+else
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
+fi
+
+server_pid=$!
+
+ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
+ "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
+
+client_pid=$!
+
+wait "${server_pid}"
+exit_code=$?
+kill -9 "${client_pid}"
+if [[ "${exit_code}" -eq 0 ]]; then
+ echo "Test Succeeded!"
+fi
+exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
new file mode 100755
index 000000000000..2fef34f4aba1
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz_client.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A simple program for generating traffic for the toeplitz test.
+#
+# This program sends packets periodically for, conservatively, 20 seconds. The
+# intent is for the calling program to kill this program once it is no longer
+# needed, rather than waiting for the 20 second expiration.
+
+send_traffic() {
+ expiration=$((SECONDS+20))
+ while [[ "${SECONDS}" -lt "${expiration}" ]]
+ do
+ if [[ "${PROTO}" == "-u" ]]; then
+ echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
+ else
+ echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
+ fi
+ sleep 0.001
+ done
+}
+
+PROTO=$1
+IPVER=$2
+ADDR=$3
+PORT=$4
+
+send_traffic
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index 66354cdd5ce4..2d10ccac898a 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -28,12 +28,15 @@
# These tests provide an easy way to flip the expected result of any
# of these behaviors for testing kernel patches that change them.
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
# nettest can be run from PATH or from same directory as this selftest
if ! which nettest >/dev/null; then
PATH=$PWD:$PATH
if ! which nettest >/dev/null; then
echo "'nettest' command not found; skipping tests"
- exit 0
+ exit $ksft_skip
fi
fi
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 11d7cdb898c0..19eac3e44c06 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -13,7 +13,7 @@ readonly NS_DST=$BASE$DST
readonly BM_NET_V4=192.168.1.
readonly BM_NET_V6=2001:db8::
-readonly NPROCS=`nproc`
+readonly CPUS=`nproc`
ret=0
cleanup() {
@@ -75,6 +75,29 @@ chk_tso_flag() {
__chk_flag "$1" $2 $3 tcp-segmentation-offload
}
+chk_channels() {
+ local msg="$1"
+ local target=$2
+ local rx=$3
+ local tx=$4
+
+ local dev=veth$target
+
+ local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep RX: | tail -n 1 | awk '{print $2}' `
+ local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep TX: | tail -n 1 | awk '{print $2}'`
+ local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep Combined: | tail -n 1 | awk '{print $2}'`
+
+ printf "%-60s" "$msg"
+ if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then
+ echo " ok "
+ else
+ echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined"
+ fi
+}
+
chk_gro() {
local msg="$1"
local expected=$2
@@ -107,11 +130,100 @@ chk_gro() {
fi
}
+__change_channels()
+{
+ local cur_cpu
+ local end=$1
+ local cur
+ local i
+
+ while true; do
+ printf -v cur '%(%s)T'
+ [ $cur -le $end ] || break
+
+ for i in `seq 1 $CPUS`; do
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i
+ ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i
+ done
+
+ for i in `seq 1 $((CPUS - 1))`; do
+ cur_cpu=$((CPUS - $i))
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu
+ ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu
+ done
+ done
+}
+
+__send_data() {
+ local end=$1
+
+ while true; do
+ printf -v cur '%(%s)T'
+ [ $cur -le $end ] || break
+
+ ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST
+ done
+}
+
+do_stress() {
+ local end
+ printf -v end '%(%s)T'
+ end=$((end + $STRESS))
+
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3
+ ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+
+ ip netns exec $NS_DST ./udpgso_bench_rx &
+ local rx_pid=$!
+
+ echo "Running stress test for $STRESS seconds..."
+ __change_channels $end &
+ local ch_pid=$!
+ __send_data $end &
+ local data_pid_1=$!
+ __send_data $end &
+ local data_pid_2=$!
+ __send_data $end &
+ local data_pid_3=$!
+ __send_data $end &
+ local data_pid_4=$!
+
+ wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4
+ kill -9 $rx_pid
+ echo "done"
+
+ # restore previous setting
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1
+}
+
+usage() {
+ echo "Usage: $0 [-h] [-s <seconds>]"
+ echo -e "\t-h: show this help"
+ echo -e "\t-s: run optional stress tests for the given amount of seconds"
+}
+
+STRESS=0
+while getopts "hs:" option; do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "s")
+ STRESS=$OPTARG
+ ;;
+ esac
+done
+
if [ ! -f ../bpf/xdp_dummy.o ]; then
echo "Missing xdp_dummy helper. Build bpf selftest first"
exit 1
fi
+[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped"
+[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too"
+
create_ns
chk_gro_flag "default - gro flag" $SRC off
chk_gro_flag " - peer gro flag" $DST off
@@ -134,6 +246,8 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
+chk_channels "default channels" $DST 1 1
+
ip -n $NS_DST link set dev veth$DST down
ip netns exec $NS_DST ethtool -K veth$DST gro on
chk_gro_flag "with gro enabled on link down - gro flag" $DST on
@@ -147,6 +261,56 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
+
+CUR_TX=1
+CUR_RX=1
+if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST tx 2
+ chk_channels "setting tx channels" $DST 1 2
+ CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+ chk_channels "setting both rx and tx channels" $DST 3 3
+ CUR_RX=3
+ CUR_TX=3
+fi
+
+ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null
+chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX
+
+ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null
+chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX
+
+if [ $CPUS -gt 1 ]; then
+ # this also tests queues nr reduction
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
+ printf "%-60s" "bad setting: XDP with RX nr less than TX"
+ ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+ section xdp_dummy 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+
+ # the following tests will run with multiple channels active
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2
+ ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+ section xdp_dummy 2>/dev/null
+ printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+ CUR_RX=2
+ CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+ printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set"
+ ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+ chk_channels "setting invalid channels nr" $DST 2 2
+fi
+
ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
chk_gro_flag "with xdp attached - gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
@@ -167,10 +331,27 @@ chk_gro_flag " - after gro on xdp off, gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
chk_tso_flag " - tso flag" $SRC on
chk_tso_flag " - peer tso flag" $DST on
+
+if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST tx 1
+ chk_channels "decreasing tx channels with device down" $DST 2 1
+fi
+
ip -n $NS_DST link set dev veth$DST up
ip -n $NS_SRC link set dev veth$SRC up
chk_gro " - aggregation" 1
+if [ $CPUS -gt 1 ]; then
+ [ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress
+
+ ip -n $NS_DST link set dev veth$DST down
+ ip -n $NS_SRC link set dev veth$SRC down
+ ip netns exec $NS_DST ethtool -L veth$DST tx 2
+ chk_channels "increasing tx channels with device down" $DST 2 2
+ ip -n $NS_DST link set dev veth$DST up
+ ip -n $NS_SRC link set dev veth$SRC up
+fi
+
ip netns exec $NS_DST ethtool -K veth$DST gro off
ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
chk_gro "aggregation again with default and TSO off" 10
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
index 18b982d611de..865d53c1781c 100755
--- a/tools/testing/selftests/net/vrf_strict_mode_test.sh
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -3,6 +3,9 @@
# This test is designed for testing the new VRF strict_mode functionality.
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
ret=0
# identifies the "init" network namespace which is often called root network
@@ -371,18 +374,18 @@ vrf_strict_mode_check_support()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &> /dev/null
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index d7ec1e7da0d0..1bddbe934204 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -22,7 +22,11 @@
* XXX: This is wrong on {mips, parisc, powerpc, sparc}.
*/
#undef O_LARGEFILE
+#ifdef __aarch64__
+#define O_LARGEFILE 0x20000
+#else
#define O_LARGEFILE 0x8000
+#endif
struct open_how_ext {
struct open_how inner;
diff --git a/tools/testing/selftests/powerpc/primitives/asm/extable.h b/tools/testing/selftests/powerpc/primitives/asm/extable.h
new file mode 120000
index 000000000000..6385f059a951
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/extable.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/extable.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
index 82f7bdc2e5e6..67ca297c5cca 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
@@ -57,7 +57,7 @@ trans:
: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2),
[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
[flt_2] "b" (&b), [cptr1] "b" (&cptr[1])
- : "memory", "r7", "r8", "r9", "r10",
+ : "memory", "r0", "r7", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15", "r16",
"r17", "r18", "r19", "r20", "r21", "r22",
"r23", "r24", "r25", "r26", "r27", "r28",
@@ -113,6 +113,7 @@ int ptrace_tm_gpr(void)
int ret, status;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
pid = fork();
if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
index ad65be6e8e85..6f2bce1b6c5d 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -65,7 +65,7 @@ trans:
: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
[flt_4] "b" (&d)
- : "memory", "r5", "r6", "r7",
+ : "memory", "r0", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
@@ -119,6 +119,7 @@ int ptrace_tm_spd_gpr(void)
int ret, status;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
index 2ecfa1158e2b..e112a34fbe59 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
@@ -129,6 +129,7 @@ int ptrace_tm_spd_tar(void)
int ret, status;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
if (pid == 0)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
index 6f7fb51f0809..40133d49fe39 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
@@ -129,6 +129,7 @@ int ptrace_tm_spd_vsx(void)
int ret, status, i;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
for (i = 0; i < 128; i++) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
index 068bfed2e606..880ba6a29a48 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
@@ -114,6 +114,7 @@ int ptrace_tm_spr(void)
int ret, status;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(struct shared), 0777|IPC_CREAT);
shm_id1 = shmget(IPC_PRIVATE, sizeof(int), 0777|IPC_CREAT);
pid = fork();
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
index 46ef378a15ec..d0db6df0f0ea 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
@@ -117,6 +117,7 @@ int ptrace_tm_tar(void)
int ret, status;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
pid = fork();
if (pid == 0)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
index 70ca01234f79..4f05ce4fd282 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
@@ -113,6 +113,7 @@ int ptrace_tm_vsx(void)
int ret, status, i;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
for (i = 0; i < 128; i++) {
diff --git a/tools/testing/selftests/powerpc/signal/signal_tm.c b/tools/testing/selftests/powerpc/signal/signal_tm.c
index 5bf2224ef7f2..c9cf66a3daa2 100644
--- a/tools/testing/selftests/powerpc/signal/signal_tm.c
+++ b/tools/testing/selftests/powerpc/signal/signal_tm.c
@@ -56,6 +56,7 @@ static int test_signal_tm()
}
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
for (i = 0; i < MAX_ATTEMPT; i++) {
/*
diff --git a/tools/testing/selftests/powerpc/tm/tm-exec.c b/tools/testing/selftests/powerpc/tm/tm-exec.c
index 260cfdb97d23..c59919d6710d 100644
--- a/tools/testing/selftests/powerpc/tm/tm-exec.c
+++ b/tools/testing/selftests/powerpc/tm/tm-exec.c
@@ -27,6 +27,7 @@ static char *path;
static int test_exec(void)
{
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
asm __volatile__(
"tbegin.;"
diff --git a/tools/testing/selftests/powerpc/tm/tm-fork.c b/tools/testing/selftests/powerpc/tm/tm-fork.c
index 6efa5a685a77..c27b935f0e9f 100644
--- a/tools/testing/selftests/powerpc/tm/tm-fork.c
+++ b/tools/testing/selftests/powerpc/tm/tm-fork.c
@@ -21,6 +21,7 @@
int test_fork(void)
{
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
asm __volatile__(
"tbegin.;"
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
index 29e5f26af7b9..a7bbf034b5bb 100644
--- a/tools/testing/selftests/powerpc/tm/tm-poison.c
+++ b/tools/testing/selftests/powerpc/tm/tm-poison.c
@@ -20,7 +20,6 @@
#include <sched.h>
#include <sys/types.h>
#include <signal.h>
-#include <inttypes.h>
#include "tm.h"
@@ -34,6 +33,7 @@ int tm_poison_test(void)
bool fail_vr = false;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
cpu = pick_online_cpu();
FAIL_IF(cpu < 0);
diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
index 4cdb83964bb3..85c940ae6ff8 100644
--- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
@@ -40,6 +40,7 @@ int test_body(void)
uint64_t rv, dscr1 = 1, dscr2, texasr;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
printf("Check DSCR TM context switch: ");
fflush(stdout);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
index 254f912ad611..657d755b2905 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
@@ -79,6 +79,7 @@ static int tm_signal_context_chk_fpu()
pid_t pid = getpid();
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
act.sa_sigaction = signal_usr1;
sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
index 0cc680f61828..400fa70ca71e 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
@@ -81,6 +81,7 @@ static int tm_signal_context_chk_gpr()
pid_t pid = getpid();
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
act.sa_sigaction = signal_usr1;
sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
index b6d52730a0d8..d628fd302b28 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
@@ -104,6 +104,7 @@ static int tm_signal_context_chk()
pid_t pid = getpid();
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
act.sa_sigaction = signal_usr1;
sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
index 8e25e2072ecd..9bd869245bad 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
@@ -153,6 +153,7 @@ static int tm_signal_context_chk()
pid_t pid = getpid();
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
act.sa_sigaction = signal_usr1;
sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
index 5908bc6abe60..0b84c9208d62 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
@@ -226,6 +226,7 @@ int tm_signal_pagefault(void)
stack_t ss;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
SKIP_IF(!have_userfaultfd());
setup_uf_mem();
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
index 07c388147b75..06b801906f27 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
@@ -32,6 +32,7 @@ int tm_signal_sigreturn_nt(void)
struct sigaction trap_sa;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
trap_sa.sa_flags = SA_SIGINFO;
trap_sa.sa_sigaction = trap_signal_handler;
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
index cdcf8c5bbbc7..68807aac8dd3 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
@@ -35,6 +35,7 @@ int tm_signal_stack()
int pid;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
pid = fork();
if (pid < 0)
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
index 9a6017a1d769..ffe4e5515f33 100644
--- a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
+++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
@@ -55,6 +55,7 @@ int tm_sigreturn(void)
uint64_t ret = 0;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
SKIP_IF(!is_ppc64le());
memset(&sa, 0, sizeof(sa));
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c
index becb8207b432..467a6b3134b2 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall.c
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -25,7 +25,6 @@ extern int getppid_tm_suspended(void);
unsigned retries = 0;
#define TEST_DURATION 10 /* seconds */
-#define TM_RETRIES 100
pid_t getppid_tm(bool suspend)
{
@@ -67,6 +66,7 @@ int tm_syscall(void)
struct timeval end, now;
SKIP_IF(!have_htm_nosc());
+ SKIP_IF(htm_is_synthetic());
setbuf(stdout, NULL);
diff --git a/tools/testing/selftests/powerpc/tm/tm-tar.c b/tools/testing/selftests/powerpc/tm/tm-tar.c
index 03be8c47292b..f2a9137f3c1e 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tar.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tar.c
@@ -26,6 +26,7 @@ int test_tar(void)
int i;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
SKIP_IF(!is_ppc64le());
for (i = 0; i < num_loops; i++)
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
index 794d574db784..dd5ddffa28b7 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -96,6 +96,7 @@ int test_tmspr()
unsigned long i;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
/* To cause some context switching */
thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
index 11521077f915..97cb74768e30 100644
--- a/tools/testing/selftests/powerpc/tm/tm-trap.c
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -255,6 +255,7 @@ int tm_trap_test(void)
struct sigaction trap_sa;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
trap_sa.sa_flags = SA_SIGINFO;
trap_sa.sa_sigaction = trap_signal_handler;
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index a1348a5f721a..6bf1b65b020d 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -344,6 +344,7 @@ int tm_unavailable_test(void)
cpu_set_t cpuset;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
cpu = pick_online_cpu();
FAIL_IF(cpu < 0);
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
index 9ef37a9836ac..34364ed2b6b7 100644
--- a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
+++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
@@ -91,6 +91,7 @@ int tm_vmx_unavail_test()
pthread_t *thread;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
passed = 1;
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
index c1e788a6df47..1640e7ead69b 100644
--- a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
+++ b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
@@ -46,6 +46,7 @@ int test_vmxcopy()
uint64_t aborted = 0;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
SKIP_IF(!is_ppc64le());
fd = mkstemp(tmpfile);
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index c5a1e5c163fc..c03c6e778876 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -10,6 +10,9 @@
#include <asm/tm.h>
#include "utils.h"
+#include "reg.h"
+
+#define TM_RETRIES 100
static inline bool have_htm(void)
{
@@ -31,6 +34,39 @@ static inline bool have_htm_nosc(void)
#endif
}
+/*
+ * Transactional Memory was removed in ISA 3.1. A synthetic TM implementation
+ * is provided on P10 for threads running in P8/P9 compatibility mode. The
+ * synthetic implementation immediately fails after tbegin. This failure sets
+ * Bit 7 (Failure Persistent) and Bit 15 (Implementation-specific).
+ */
+static inline bool htm_is_synthetic(void)
+{
+ int i;
+
+ /*
+ * Per the ISA, the Failure Persistent bit may be incorrect. Try a few
+ * times in case we got an Implementation-specific failure on a non ISA
+ * v3.1 system. On these systems the Implementation-specific failure
+ * should not be persistent.
+ */
+ for (i = 0; i < TM_RETRIES; i++) {
+ asm volatile(
+ "tbegin.;"
+ "beq 1f;"
+ "tend.;"
+ "1:"
+ :
+ :
+ : "memory");
+
+ if ((__builtin_get_texasr() & (TEXASR_FP | TEXASR_IC)) !=
+ (TEXASR_FP | TEXASR_IC))
+ break;
+ }
+ return i == TM_RETRIES;
+}
+
static inline long failure_code(void)
{
return __builtin_get_texasru() >> 24;
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index 15d937ba96ca..fd1ffaa5a135 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -68,16 +68,12 @@ do
cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
srand(n + me + systime());
ncpus = split(cpus, ca);
- curcpu = ca[int(rand() * ncpus + 1)];
- z = "";
- for (i = 1; 4 * i <= curcpu; i++)
- z = z "0";
- print "0x" 2 ^ (curcpu % 4) z;
+ print ca[int(rand() * ncpus + 1)];
}' < /dev/null`
n=$(($n+1))
- if ! taskset -p $cpumask $$ > /dev/null 2>&1
+ if ! taskset -c -p $cpumask $$ > /dev/null 2>&1
then
- echo taskset failure: '"taskset -p ' $cpumask $$ '"'
+ echo taskset failure: '"taskset -c -p ' $cpumask $$ '"'
exit 1
fi
diff --git a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
index e5cc6b2f195e..1af5d6b86b39 100755
--- a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
+++ b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
@@ -14,7 +14,7 @@ if test -z "$TORTURE_KCONFIG_KCSAN_ARG"
then
exit 0
fi
-cat $1/*/console.log |
+find $1 -name console.log -exec cat {} \; |
grep "BUG: KCSAN: " |
sed -e 's/^\[[^]]*] //' |
sort |
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index d8c8483c46f1..5a0023d183da 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -142,7 +142,7 @@ then
echo "Cannot copy from $oldrun to $rundir."
usage
fi
-rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
+rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
touch "$rundir/log"
echo $scriptname $args | tee -a "$rundir/log"
echo $oldrun > "$rundir/re-run"
@@ -179,6 +179,6 @@ if test -n "$dryrun"
then
echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log"
else
- ( cd "$rundir"; sh $T/runbatches.sh )
+ ( cd "$rundir"; sh $T/runbatches.sh ) | tee -a "$rundir/log"
kvm-end-run-stats.sh "$rundir" "$starttime"
fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
new file mode 100755
index 000000000000..f99b2c146f83
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
@@ -0,0 +1,106 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Produce awk statements roughly depicting the system's CPU and cache
+# layout. If the required information is not available, produce
+# error messages as awk comments. Successful exit regardless.
+#
+# Usage: kvm-assign-cpus.sh /path/to/sysfs
+
+T=/tmp/kvm-assign-cpus.sh.$$
+trap 'rm -rf $T' 0 2
+mkdir $T
+
+sysfsdir=${1-/sys/devices/system/node}
+if ! cd "$sysfsdir" > $T/msg 2>&1
+then
+ sed -e 's/^/# /' < $T/msg
+ exit 0
+fi
+nodelist="`ls -d node*`"
+for i in node*
+do
+ if ! test -d $i/
+ then
+ echo "# Not a directory: $sysfsdir/node*"
+ exit 0
+ fi
+ for j in $i/cpu*/cache/index*
+ do
+ if ! test -d $j/
+ then
+ echo "# Not a directory: $sysfsdir/$j"
+ exit 0
+ else
+ break
+ fi
+ done
+ indexlist="`ls -d $i/cpu* | grep 'cpu[0-9][0-9]*' | head -1 | sed -e 's,^.*$,ls -d &/cache/index*,' | sh | sed -e 's,^.*/,,'`"
+ break
+done
+for i in node*/cpu*/cache/index*/shared_cpu_list
+do
+ if ! test -f $i
+ then
+ echo "# Not a file: $sysfsdir/$i"
+ exit 0
+ else
+ break
+ fi
+done
+firstshared=
+for i in $indexlist
+do
+ rm -f $T/cpulist
+ for n in node*
+ do
+ f="$n/cpu*/cache/$i/shared_cpu_list"
+ if ! cat $f > $T/msg 2>&1
+ then
+ sed -e 's/^/# /' < $T/msg
+ exit 0
+ fi
+ cat $f >> $T/cpulist
+ done
+ if grep -q '[-,]' $T/cpulist
+ then
+ if test -z "$firstshared"
+ then
+ firstshared="$i"
+ fi
+ fi
+done
+if test -z "$firstshared"
+then
+ splitindex="`echo $indexlist | sed -e 's/ .*$//'`"
+else
+ splitindex="$firstshared"
+fi
+nodenum=0
+for n in node*
+do
+ cat $n/cpu*/cache/$splitindex/shared_cpu_list | sort -u -k1n |
+ awk -v nodenum="$nodenum" '
+ BEGIN {
+ idx = 0;
+ }
+
+ {
+ nlists = split($0, cpulists, ",");
+ for (i = 1; i <= nlists; i++) {
+ listsize = split(cpulists[i], cpus, "-");
+ if (listsize == 1)
+ cpus[2] = cpus[1];
+ for (j = cpus[1]; j <= cpus[2]; j++) {
+ print "cpu[" nodenum "][" idx "] = " j ";";
+ idx++;
+ }
+ }
+ }
+
+ END {
+ print "nodecpus[" nodenum "] = " idx ";";
+ }'
+ nodenum=`expr $nodenum + 1`
+done
+echo "numnodes = $nodenum;"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh b/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh
new file mode 100755
index 000000000000..20c7c53c5795
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh
@@ -0,0 +1,88 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Create an awk script that takes as input numbers of CPUs and outputs
+# lists of CPUs, one per line in both cases.
+#
+# Usage: kvm-get-cpus-script.sh /path/to/cpu/arrays /path/to/put/script [ /path/to/state ]
+#
+# The CPU arrays are output by kvm-assign-cpus.sh, and are valid awk
+# statements initializing the variables describing the system's topology.
+#
+# The optional state is input by this script (if the file exists and is
+# non-empty), and can also be output by this script.
+
+cpuarrays="${1-/sys/devices/system/node}"
+scriptfile="${2}"
+statefile="${3}"
+
+if ! test -f "$cpuarrays"
+then
+ echo "File not found: $cpuarrays" 1>&2
+ exit 1
+fi
+scriptdir="`dirname "$scriptfile"`"
+if ! test -d "$scriptdir" || ! test -x "$scriptdir" || ! test -w "$scriptdir"
+then
+ echo "Directory not usable for script output: $scriptdir"
+ exit 1
+fi
+
+cat << '___EOF___' > "$scriptfile"
+BEGIN {
+___EOF___
+cat "$cpuarrays" >> "$scriptfile"
+if test -r "$statefile"
+then
+ cat "$statefile" >> "$scriptfile"
+fi
+cat << '___EOF___' >> "$scriptfile"
+}
+
+# Do we have the system architecture to guide CPU affinity?
+function gotcpus()
+{
+ return numnodes != "";
+}
+
+# Return a comma-separated list of the next n CPUs.
+function nextcpus(n, i, s)
+{
+ for (i = 0; i < n; i++) {
+ if (nodecpus[curnode] == "")
+ curnode = 0;
+ if (cpu[curnode][curcpu[curnode]] == "")
+ curcpu[curnode] = 0;
+ if (s != "")
+ s = s ",";
+ s = s cpu[curnode][curcpu[curnode]];
+ curcpu[curnode]++;
+ curnode++
+ }
+ return s;
+}
+
+# Dump out the current node/CPU state so that a later invocation of this
+# script can continue where this one left off. Of course, this only works
+# when a state file was specified and where there was valid sysfs state.
+# Returns 1 if the state was dumped, 0 otherwise.
+#
+# Dumping the state for one system configuration and loading it into
+# another isn't likely to do what you want, whatever that might be.
+function dumpcpustate( i, fn)
+{
+___EOF___
+echo ' fn = "'"$statefile"'";' >> $scriptfile
+cat << '___EOF___' >> "$scriptfile"
+ if (fn != "" && gotcpus()) {
+ print "curnode = " curnode ";" > fn;
+ for (i = 0; i < numnodes; i++)
+ if (curcpu[i] != "")
+ print "curcpu[" i "] = " curcpu[i] ";" >> fn;
+ return 1;
+ }
+ if (fn != "")
+ print "# No CPU state to dump." > fn;
+ return 0;
+}
+___EOF___
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
index f3a7a5e2b89d..db2c0e2c8e1d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
@@ -25,7 +25,7 @@ then
echo "$configfile -------"
else
title="$configfile ------- $ncs acquisitions/releases"
- dur=`sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
+ dur=`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' 2> /dev/null`
if test -z "$dur"
then
:
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
index 671bfee4fcef..3afa5c6eda4f 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
@@ -25,7 +25,7 @@ if test -z "$nscfs"
then
echo "$configfile ------- "
else
- dur="`sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`"
+ dur="`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' 2> /dev/null`"
if test -z "$dur"
then
rate=""
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index e01b31b87044..0a5419982ab3 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -74,7 +74,10 @@ do
done
if test -f "$rd/kcsan.sum"
then
- if grep -q CONFIG_KCSAN=y $T
+ if ! test -f $T
+ then
+ :
+ elif grep -q CONFIG_KCSAN=y $T
then
echo "Compiler or architecture does not support KCSAN!"
echo Did you forget to switch your compiler with '--kmake-arg CC=<cc-that-supports-kcsan>'?
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh
new file mode 100755
index 000000000000..014ce68260d7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Periodically scan a directory tree to prevent files from being reaped
+# by systemd and friends on long runs.
+#
+# Usage: kvm-remote-noreap.sh pathname
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+pathname="$1"
+if test "$pathname" = ""
+then
+ echo Usage: kvm-remote-noreap.sh pathname
+ exit 1
+fi
+if ! test -d "$pathname"
+then
+ echo Usage: kvm-remote-noreap.sh pathname
+ echo " pathname must be a directory."
+ exit 2
+fi
+
+while test -d "$pathname"
+do
+ find "$pathname" -type f -exec touch -c {} \; > /dev/null 2>&1
+ sleep 30
+done
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 79e680e0e7bf..03126eb6ec5a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -124,10 +124,12 @@ awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
n = $1;
sub(/\./, "", n);
fn = dest "/kvm-remote-" n ".sh"
+ print "kvm-remote-noreap.sh " rundir " &" > fn;
scenarios = "";
for (i = 2; i <= NF; i++)
scenarios = scenarios " " $i;
- print "kvm-test-1-run-batch.sh" scenarios > fn;
+ print "kvm-test-1-run-batch.sh" scenarios >> fn;
+ print "sync" >> fn;
print "rm " rundir "/remote.run" >> fn;
}'
chmod +x $T/bin/kvm-remote-*.sh
@@ -172,11 +174,20 @@ checkremotefile () {
do
ssh $1 "test -f \"$2\""
ret=$?
- if test "$ret" -ne 255
+ if test "$ret" -eq 255
then
+ echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
+ elif test "$ret" -eq 0
+ then
+ return 0
+ elif test "$ret" -eq 1
+ then
+ echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\"
+ return 1
+ else
+ echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date`
return $ret
fi
- echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
sleep $sleeptime
done
}
@@ -242,7 +253,8 @@ do
do
sleep 30
done
- ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu_pid */qemu-retval; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
+ echo " ---" Collecting results from $i `date`
+ ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
done
( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
index 7ea0809e229e..1e29d656501b 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
@@ -50,10 +50,34 @@ grep '^#' $1/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
echo ---- System running test: `uname -a`
echo ---- Starting kernels. `date` | tee -a log
$TORTURE_JITTER_START
+kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk
for i in "$@"
do
echo ---- System running test: `uname -a` > $i/kvm-test-1-run-qemu.sh.out
echo > $i/kvm-test-1-run-qemu.sh.out
+ export TORTURE_AFFINITY=
+ kvm-get-cpus-script.sh $T/cpuarray.awk $T/cpubatches.awk $T/cpustate
+ cat << ' ___EOF___' >> $T/cpubatches.awk
+ END {
+ affinitylist = "";
+ if (!gotcpus()) {
+ print "echo No CPU-affinity information, so no taskset command.";
+ } else if (cpu_count !~ /^[0-9][0-9]*$/) {
+ print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command.";
+ } else {
+ affinitylist = nextcpus(cpu_count);
+ if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/))
+ print "echo " scenario ": Bogus CPU-affinity information, so no taskset command.";
+ else if (!dumpcpustate())
+ print "echo " scenario ": Could not dump state, so no taskset command.";
+ else
+ print "export TORTURE_AFFINITY=" affinitylist;
+ }
+ }
+ ___EOF___
+ cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`"
+ affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`"
+ $affinity_export
kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 &
done
for i in $runfiles
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
index 5b1aa2a4f3f6..44280582c594 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
@@ -39,27 +39,34 @@ echo ' ---' `date`: Starting kernel, PID $$
grep '^#' $resdir/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
. $T/qemu-cmd-settings
-# Decorate qemu-cmd with redirection, backgrounding, and PID capture
-sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
-echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd
+# Decorate qemu-cmd with affinity, redirection, backgrounding, and PID capture
+taskset_command=
+if test -n "$TORTURE_AFFINITY"
+then
+ taskset_command="taskset -c $TORTURE_AFFINITY "
+fi
+sed -e 's/^[^#].*$/'"$taskset_command"'& 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
+echo 'qemu_pid=$!' >> $T/qemu-cmd
+echo 'echo $qemu_pid > $resdir/qemu-pid' >> $T/qemu-cmd
+echo 'taskset -c -p $qemu_pid > $resdir/qemu-affinity' >> $T/qemu-cmd
# In case qemu refuses to run...
echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
# Attempt to run qemu
kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
-( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+( . $T/qemu-cmd; wait `cat $resdir/qemu-pid`; echo $? > $resdir/qemu-retval ) &
commandcompleted=0
if test -z "$TORTURE_KCONFIG_GDB_ARG"
then
sleep 10 # Give qemu's pid a chance to reach the file
- if test -s "$resdir/qemu_pid"
+ if test -s "$resdir/qemu-pid"
then
- qemu_pid=`cat "$resdir/qemu_pid"`
- echo Monitoring qemu job at pid $qemu_pid
+ qemu_pid=`cat "$resdir/qemu-pid"`
+ echo Monitoring qemu job at pid $qemu_pid `date`
else
qemu_pid=""
- echo Monitoring qemu job at yet-as-unknown pid
+ echo Monitoring qemu job at yet-as-unknown pid `date`
fi
fi
if test -n "$TORTURE_KCONFIG_GDB_ARG"
@@ -82,9 +89,9 @@ then
fi
while :
do
- if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+ if test -z "$qemu_pid" && test -s "$resdir/qemu-pid"
then
- qemu_pid=`cat "$resdir/qemu_pid"`
+ qemu_pid=`cat "$resdir/qemu-pid"`
fi
kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
@@ -115,22 +122,22 @@ do
break
fi
done
-if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+if test -z "$qemu_pid" && test -s "$resdir/qemu-pid"
then
- qemu_pid=`cat "$resdir/qemu_pid"`
+ qemu_pid=`cat "$resdir/qemu-pid"`
fi
-if test $commandcompleted -eq 0 -a -n "$qemu_pid"
+if test $commandcompleted -eq 0 && test -n "$qemu_pid"
then
if ! test -f "$resdir/../STOP.1"
then
- echo Grace period for qemu job at pid $qemu_pid
+ echo Grace period for qemu job at pid $qemu_pid `date`
fi
oldline="`tail $resdir/console.log`"
while :
do
if test -f "$resdir/../STOP.1"
then
- echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1
+ echo "PID $qemu_pid killed due to run STOP.1 request `date`" >> $resdir/Warnings 2>&1
kill -KILL $qemu_pid
break
fi
@@ -152,13 +159,17 @@ then
then
last_ts=0
fi
- if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ if test "$newline" != "$oldline" && test "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) && test "$last_ts" -gt "$TORTURE_SHUTDOWN_GRACE"
then
must_continue=yes
+ if test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ then
+ echo Continuing at console.log time $last_ts \"`tail -n 1 $resdir/console.log`\" `date`
+ fi
fi
- if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ if test $must_continue = no && test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
then
- echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
+ echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds `date`" >> $resdir/Warnings 2>&1
kill -KILL $qemu_pid
break
fi
@@ -172,5 +183,3 @@ fi
# Tell the script that this run is done.
rm -f $resdir/build.run
-
-parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 420ed5ce9d32..f4c8055dbf7a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -205,6 +205,7 @@ echo "# TORTURE_KCONFIG_GDB_ARG=\"$TORTURE_KCONFIG_GDB_ARG\"" >> $resdir/qemu-cm
echo "# TORTURE_JITTER_START=\"$TORTURE_JITTER_START\"" >> $resdir/qemu-cmd
echo "# TORTURE_JITTER_STOP=\"$TORTURE_JITTER_STOP\"" >> $resdir/qemu-cmd
echo "# TORTURE_TRUST_MAKE=\"$TORTURE_TRUST_MAKE\"; export TORTURE_TRUST_MAKE" >> $resdir/qemu-cmd
+echo "# TORTURE_CPU_COUNT=$cpu_count" >> $resdir/qemu-cmd
if test -n "$TORTURE_BUILDONLY"
then
@@ -214,3 +215,4 @@ then
fi
kvm-test-1-run-qemu.sh $resdir
+parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index b4ac4ee33222..f442d84fb2a3 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -430,17 +430,10 @@ then
git diff HEAD >> $resdir/$ds/testid.txt
fi
___EOF___
-awk < $T/cfgcpu.pack \
- -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
- -v CONFIGDIR="$CONFIGFRAG/" \
- -v KVM="$KVM" \
- -v ncpus=$cpus \
- -v jitter="$jitter" \
- -v rd=$resdir/$ds/ \
- -v dur=$dur \
- -v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
- -v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
-'BEGIN {
+kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk
+kvm-get-cpus-script.sh $T/cpuarray.awk $T/dumpbatches.awk
+cat << '___EOF___' >> $T/dumpbatches.awk
+BEGIN {
i = 0;
}
@@ -451,7 +444,7 @@ awk < $T/cfgcpu.pack \
}
# Dump out the scripting required to run one test batch.
-function dump(first, pastlast, batchnum)
+function dump(first, pastlast, batchnum, affinitylist)
{
print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log";
print "needqemurun="
@@ -483,6 +476,14 @@ function dump(first, pastlast, batchnum)
print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
print "mkdir " rd cfr[jn] " || :";
print "touch " builddir ".wait";
+ affinitylist = "";
+ if (gotcpus()) {
+ affinitylist = nextcpus(cpusr[jn]);
+ }
+ if (affinitylist ~ /^[0-9,-][0-9,-]*$/)
+ print "export TORTURE_AFFINITY=" affinitylist;
+ else
+ print "export TORTURE_AFFINITY=";
print "kvm-test-1-run.sh " CONFIGDIR cf[j], rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
print "while test -f " builddir ".wait"
@@ -560,7 +561,19 @@ END {
# Dump the last batch.
if (ncpus != 0)
dump(first, i, batchnum);
-}' >> $T/script
+}
+___EOF___
+awk < $T/cfgcpu.pack \
+ -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
+ -v CONFIGDIR="$CONFIGFRAG/" \
+ -v KVM="$KVM" \
+ -v ncpus=$cpus \
+ -v jitter="$jitter" \
+ -v rd=$resdir/$ds/ \
+ -v dur=$dur \
+ -v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
+ -v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
+ -f $T/dumpbatches.awk >> $T/script
echo kvm-end-run-stats.sh "$resdir/$ds" "$starttime" >> $T/script
# Extract the tests and their batches from the script.
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 53ec7c046262..363f56081eff 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -53,6 +53,7 @@ do_refscale=yes
do_kvfree=yes
do_kasan=yes
do_kcsan=no
+do_clocksourcewd=yes
# doyesno - Helper function for yes/no arguments
function doyesno () {
@@ -72,6 +73,7 @@ usage () {
echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
echo " --doall"
echo " --doallmodconfig / --do-no-allmodconfig"
+ echo " --do-clocksourcewd / --do-no-clocksourcewd"
echo " --do-kasan / --do-no-kasan"
echo " --do-kcsan / --do-no-kcsan"
echo " --do-kvfree / --do-no-kvfree"
@@ -109,7 +111,7 @@ do
configs_scftorture="$configs_scftorture $2"
shift
;;
- --doall)
+ --do-all|--doall)
do_allmodconfig=yes
do_rcutorture=yes
do_locktorture=yes
@@ -119,10 +121,14 @@ do
do_kvfree=yes
do_kasan=yes
do_kcsan=yes
+ do_clocksourcewd=yes
;;
--do-allmodconfig|--do-no-allmodconfig)
do_allmodconfig=`doyesno "$1" --do-allmodconfig`
;;
+ --do-clocksourcewd|--do-no-clocksourcewd)
+ do_clocksourcewd=`doyesno "$1" --do-clocksourcewd`
+ ;;
--do-kasan|--do-no-kasan)
do_kasan=`doyesno "$1" --do-kasan`
;;
@@ -135,7 +141,7 @@ do
--do-locktorture|--do-no-locktorture)
do_locktorture=`doyesno "$1" --do-locktorture`
;;
- --do-none)
+ --do-none|--donone)
do_allmodconfig=no
do_rcutorture=no
do_locktorture=no
@@ -145,6 +151,7 @@ do
do_kvfree=no
do_kasan=no
do_kcsan=no
+ do_clocksourcewd=no
;;
--do-rcuscale|--do-no-rcuscale)
do_rcuscale=`doyesno "$1" --do-rcuscale`
@@ -279,9 +286,9 @@ function torture_one {
# torture_bootargs="[ kernel boot arguments ]"
# torture_set flavor [ kvm.sh arguments ]
#
-# Note that "flavor" is an arbitrary string. Supply --torture if needed.
-# Note that quoting is problematic. So on the command line, pass multiple
-# values with multiple kvm.sh argument instances.
+# Note that "flavor" is an arbitrary string that does not affect kvm.sh
+# in any way. So also supply --torture if you need something other than
+# the default.
function torture_set {
local cur_kcsan_kmake_args=
local kcsan_kmake_tag=
@@ -377,6 +384,22 @@ then
torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
fi
+if test "$do_clocksourcewd" = "yes"
+then
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000"
+ torture_set "clocksourcewd-1" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
+
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 clocksource.max_cswd_read_retries=1"
+ torture_set "clocksourcewd-2" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
+
+ # In case our work is already done...
+ if test "$do_rcutorture" != "yes"
+ then
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000"
+ torture_set "clocksourcewd-3" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --trust-make
+ fi
+fi
+
echo " --- " $scriptname $args
echo " --- " Done `date` | tee -a $T/log
ret=0
@@ -395,6 +418,10 @@ then
nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
ret=2
fi
+if test "$do_kcsan" = "yes"
+then
+ TORTURE_KCONFIG_KCSAN_ARG=1 tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh tools/testing/selftests/rcutorture/res/$ds > tools/testing/selftests/rcutorture/res/$ds/kcsan.sum
+fi
echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log
tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
index bafe94cbd739..3ca112444ce7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+CONFIG_NR_CPUS=4
CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
index bafe94cbd739..3ca112444ce7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+CONFIG_NR_CPUS=4
CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
index ea4399020c6c..dc02083803ce 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+CONFIG_NR_CPUS=4
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c
index 0c4d50644c13..4b809c93ba36 100644
--- a/tools/testing/selftests/safesetid/safesetid-test.c
+++ b/tools/testing/selftests/safesetid/safesetid-test.c
@@ -152,7 +152,7 @@ static void write_policies(void)
fd = open(add_whitelist_policy_file, O_WRONLY);
if (fd < 0)
- die("cant open add_whitelist_policy file\n");
+ die("can't open add_whitelist_policy file\n");
written = write(fd, policy_str, strlen(policy_str));
if (written != strlen(policy_str)) {
if (written >= 0) {
diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c
index 63fe6521c56d..7db9cf822dc7 100644
--- a/tools/testing/selftests/sched/cs_prctl_test.c
+++ b/tools/testing/selftests/sched/cs_prctl_test.c
@@ -25,8 +25,6 @@
#include <sys/types.h>
#include <sched.h>
#include <sys/prctl.h>
-#include <sys/types.h>
-#include <sys/wait.h>
#include <unistd.h>
#include <time.h>
#include <stdio.h>
diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
index 1ab7e8130db2..47ff5afc3727 100644
--- a/tools/testing/selftests/sync/config
+++ b/tools/testing/selftests/sync/config
@@ -1,4 +1,3 @@
CONFIG_STAGING=y
CONFIG_ANDROID=y
-CONFIG_SYNC=y
CONFIG_SW_SYNC=y
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
index 6eb4c4f97060..742f2290973e 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
@@ -417,5 +417,29 @@
"teardown": [
"$TC actions flush action skbmod"
]
+ },
+ {
+ "id": "fe09",
+ "name": "Add skbmod action to mark ECN bits",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod ecn",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 1",
+ "matchPattern": "action order [0-9]*: skbmod pipe ecn",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
new file mode 100644
index 000000000000..88a20c781e49
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
@@ -0,0 +1,137 @@
+[
+ {
+ "id": "ce7d",
+ "name": "Add mq Qdisc to multi-queue device (4 queues)",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "2f82",
+ "name": "Add mq Qdisc to multi-queue device (256 queues)",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 256\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "256",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "c525",
+ "name": "Add duplicate mq Qdisc",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: mq"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "128a",
+ "name": "Delete nonexistent mq Qdisc",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "03a9",
+ "name": "Delete mq Qdisc twice",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: mq",
+ "$TC qdisc del dev $ETH root handle 1: mq"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "be0f",
+ "name": "Add mq Qdisc to single-queue device",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index cd4a27ee1466..ea04f04c173e 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -17,6 +17,7 @@ NAMES = {
'DEV1': 'v0p1',
'DEV2': '',
'DUMMY': 'dummy1',
+ 'ETH': 'eth0',
'BATCH_FILE': './batch.txt',
'BATCH_DIR': 'tmp',
# Length of time in seconds to wait before terminating a command
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index f0fd80ef17df..b02eac613fdd 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -27,3 +27,4 @@ hmm-tests
memfd_secret
local_config.*
split_huge_page_test
+ksm_tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 521243770f26..d9605bd10f2d 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -45,6 +45,7 @@ TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
TEST_GEN_FILES += split_huge_page_test
+TEST_GEN_FILES += ksm_tests
ifeq ($(MACHINE),x86_64)
CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
@@ -145,6 +146,8 @@ $(OUTPUT)/hmm-tests: local_config.h
# HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
$(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
+$(OUTPUT)/ksm_tests: LDLIBS += -lnuma
+
local_config.mk local_config.h: check_config.sh
/bin/sh ./check_config.sh $(CC)
diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
index 18d33684faad..fe8fcfb334e0 100644
--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
@@ -1,11 +1,14 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
set -e
if [[ $(id -u) -ne 0 ]]; then
echo "This test must be run as root. Skipping..."
- exit 0
+ exit $ksft_skip
fi
fault_limit_file=limit_in_bytes
diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
index d11d1febccc3..4a9a3afe9fd4 100644
--- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
@@ -1,11 +1,14 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
set -e
if [[ $(id -u) -ne 0 ]]; then
echo "This test must be run as root. Skipping..."
- exit 0
+ exit $ksft_skip
fi
usage_file=usage_in_bytes
diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
new file mode 100644
index 000000000000..b61dcdb44c5b
--- /dev/null
+++ b/tools/testing/selftests/vm/ksm_tests.c
@@ -0,0 +1,662 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/mman.h>
+#include <stdbool.h>
+#include <time.h>
+#include <string.h>
+#include <numa.h>
+
+#include "../kselftest.h"
+#include "../../../../include/vdso/time64.h"
+
+#define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
+#define KSM_FP(s) (KSM_SYSFS_PATH s)
+#define KSM_SCAN_LIMIT_SEC_DEFAULT 120
+#define KSM_PAGE_COUNT_DEFAULT 10l
+#define KSM_PROT_STR_DEFAULT "rw"
+#define KSM_USE_ZERO_PAGES_DEFAULT false
+#define KSM_MERGE_ACROSS_NODES_DEFAULT true
+#define MB (1ul << 20)
+
+struct ksm_sysfs {
+ unsigned long max_page_sharing;
+ unsigned long merge_across_nodes;
+ unsigned long pages_to_scan;
+ unsigned long run;
+ unsigned long sleep_millisecs;
+ unsigned long stable_node_chains_prune_millisecs;
+ unsigned long use_zero_pages;
+};
+
+enum ksm_test_name {
+ CHECK_KSM_MERGE,
+ CHECK_KSM_UNMERGE,
+ CHECK_KSM_ZERO_PAGE_MERGE,
+ CHECK_KSM_NUMA_MERGE,
+ KSM_MERGE_TIME,
+ KSM_COW_TIME
+};
+
+static int ksm_write_sysfs(const char *file_path, unsigned long val)
+{
+ FILE *f = fopen(file_path, "w");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_path);
+ perror("fopen");
+ return 1;
+ }
+ if (fprintf(f, "%lu", val) < 0) {
+ perror("fprintf");
+ return 1;
+ }
+ fclose(f);
+
+ return 0;
+}
+
+static int ksm_read_sysfs(const char *file_path, unsigned long *val)
+{
+ FILE *f = fopen(file_path, "r");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_path);
+ perror("fopen");
+ return 1;
+ }
+ if (fscanf(f, "%lu", val) != 1) {
+ perror("fscanf");
+ return 1;
+ }
+ fclose(f);
+
+ return 0;
+}
+
+static int str_to_prot(char *prot_str)
+{
+ int prot = 0;
+
+ if ((strchr(prot_str, 'r')) != NULL)
+ prot |= PROT_READ;
+ if ((strchr(prot_str, 'w')) != NULL)
+ prot |= PROT_WRITE;
+ if ((strchr(prot_str, 'x')) != NULL)
+ prot |= PROT_EXEC;
+
+ return prot;
+}
+
+static void print_help(void)
+{
+ printf("usage: ksm_tests [-h] <test type> [-a prot] [-p page_count] [-l timeout]\n"
+ "[-z use_zero_pages] [-m merge_across_nodes] [-s size]\n");
+
+ printf("Supported <test type>:\n"
+ " -M (page merging)\n"
+ " -Z (zero pages merging)\n"
+ " -N (merging of pages in different NUMA nodes)\n"
+ " -U (page unmerging)\n"
+ " -P evaluate merging time and speed.\n"
+ " For this test, the size of duplicated memory area (in MiB)\n"
+ " must be provided using -s option\n"
+ " -C evaluate the time required to break COW of merged pages.\n\n");
+
+ printf(" -a: specify the access protections of pages.\n"
+ " <prot> must be of the form [rwx].\n"
+ " Default: %s\n", KSM_PROT_STR_DEFAULT);
+ printf(" -p: specify the number of pages to test.\n"
+ " Default: %ld\n", KSM_PAGE_COUNT_DEFAULT);
+ printf(" -l: limit the maximum running time (in seconds) for a test.\n"
+ " Default: %d seconds\n", KSM_SCAN_LIMIT_SEC_DEFAULT);
+ printf(" -z: change use_zero_pages tunable\n"
+ " Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT);
+ printf(" -m: change merge_across_nodes tunable\n"
+ " Default: %d\n", KSM_MERGE_ACROSS_NODES_DEFAULT);
+ printf(" -s: the size of duplicated memory area (in MiB)\n");
+
+ exit(0);
+}
+
+static void *allocate_memory(void *ptr, int prot, int mapping, char data, size_t map_size)
+{
+ void *map_ptr = mmap(ptr, map_size, PROT_WRITE, mapping, -1, 0);
+
+ if (!map_ptr) {
+ perror("mmap");
+ return NULL;
+ }
+ memset(map_ptr, data, map_size);
+ if (mprotect(map_ptr, map_size, prot)) {
+ perror("mprotect");
+ munmap(map_ptr, map_size);
+ return NULL;
+ }
+
+ return map_ptr;
+}
+
+static int ksm_do_scan(int scan_count, struct timespec start_time, int timeout)
+{
+ struct timespec cur_time;
+ unsigned long cur_scan, init_scan;
+
+ if (ksm_read_sysfs(KSM_FP("full_scans"), &init_scan))
+ return 1;
+ cur_scan = init_scan;
+
+ while (cur_scan < init_scan + scan_count) {
+ if (ksm_read_sysfs(KSM_FP("full_scans"), &cur_scan))
+ return 1;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time)) {
+ perror("clock_gettime");
+ return 1;
+ }
+ if ((cur_time.tv_sec - start_time.tv_sec) > timeout) {
+ printf("Scan time limit exceeded\n");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int ksm_merge_pages(void *addr, size_t size, struct timespec start_time, int timeout)
+{
+ if (madvise(addr, size, MADV_MERGEABLE)) {
+ perror("madvise");
+ return 1;
+ }
+ if (ksm_write_sysfs(KSM_FP("run"), 1))
+ return 1;
+
+ /* Since merging occurs only after 2 scans, make sure to get at least 2 full scans */
+ if (ksm_do_scan(2, start_time, timeout))
+ return 1;
+
+ return 0;
+}
+
+static bool assert_ksm_pages_count(long dupl_page_count)
+{
+ unsigned long max_page_sharing, pages_sharing, pages_shared;
+
+ if (ksm_read_sysfs(KSM_FP("pages_shared"), &pages_shared) ||
+ ksm_read_sysfs(KSM_FP("pages_sharing"), &pages_sharing) ||
+ ksm_read_sysfs(KSM_FP("max_page_sharing"), &max_page_sharing))
+ return false;
+
+ /*
+ * Since there must be at least 2 pages for merging and 1 page can be
+ * shared with the limited number of pages (max_page_sharing), sometimes
+ * there are 'leftover' pages that cannot be merged. For example, if there
+ * are 11 pages and max_page_sharing = 10, then only 10 pages will be
+ * merged and the 11th page won't be affected. As a result, when the number
+ * of duplicate pages is divided by max_page_sharing and the remainder is 1,
+ * pages_shared and pages_sharing values will be equal between dupl_page_count
+ * and dupl_page_count - 1.
+ */
+ if (dupl_page_count % max_page_sharing == 1 || dupl_page_count % max_page_sharing == 0) {
+ if (pages_shared == dupl_page_count / max_page_sharing &&
+ pages_sharing == pages_shared * (max_page_sharing - 1))
+ return true;
+ } else {
+ if (pages_shared == (dupl_page_count / max_page_sharing + 1) &&
+ pages_sharing == dupl_page_count - pages_shared)
+ return true;
+ }
+
+ return false;
+}
+
+static int ksm_save_def(struct ksm_sysfs *ksm_sysfs)
+{
+ if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) ||
+ ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) ||
+ ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) ||
+ ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) ||
+ ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) ||
+ ksm_read_sysfs(KSM_FP("stable_node_chains_prune_millisecs"),
+ &ksm_sysfs->stable_node_chains_prune_millisecs) ||
+ ksm_read_sysfs(KSM_FP("use_zero_pages"), &ksm_sysfs->use_zero_pages))
+ return 1;
+
+ return 0;
+}
+
+static int ksm_restore(struct ksm_sysfs *ksm_sysfs)
+{
+ if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) ||
+ ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) ||
+ ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) ||
+ ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) ||
+ ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) ||
+ ksm_write_sysfs(KSM_FP("stable_node_chains_prune_millisecs"),
+ ksm_sysfs->stable_node_chains_prune_millisecs) ||
+ ksm_write_sysfs(KSM_FP("use_zero_pages"), ksm_sysfs->use_zero_pages))
+ return 1;
+
+ return 0;
+}
+
+static int check_ksm_merge(int mapping, int prot, long page_count, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ /* fill pages with the same data and merge them */
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ /* verify that the right number of pages are merged */
+ if (assert_ksm_pages_count(page_count)) {
+ printf("OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+ }
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+static int check_ksm_unmerge(int mapping, int prot, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time;
+ int page_count = 2;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ /* fill pages with the same data and merge them */
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ /* change 1 byte in each of the 2 pages -- KSM must automatically unmerge them */
+ memset(map_ptr, '-', 1);
+ memset(map_ptr + page_size, '+', 1);
+
+ /* get at least 1 scan, so KSM can detect that the pages were modified */
+ if (ksm_do_scan(1, start_time, timeout))
+ goto err_out;
+
+ /* check that unmerging was successful and 0 pages are currently merged */
+ if (assert_ksm_pages_count(0)) {
+ printf("OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+ }
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+static int check_ksm_zero_page_merge(int mapping, int prot, long page_count, int timeout,
+ bool use_zero_pages, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ if (ksm_write_sysfs(KSM_FP("use_zero_pages"), use_zero_pages))
+ return KSFT_FAIL;
+
+ /* fill pages with zero and try to merge them */
+ map_ptr = allocate_memory(NULL, prot, mapping, 0, page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ /*
+ * verify that the right number of pages are merged:
+ * 1) if use_zero_pages is set to 1, empty pages are merged
+ * with the kernel zero page instead of with each other;
+ * 2) if use_zero_pages is set to 0, empty pages are not treated specially
+ * and merged as usual.
+ */
+ if (use_zero_pages && !assert_ksm_pages_count(0))
+ goto err_out;
+ else if (!use_zero_pages && !assert_ksm_pages_count(page_count))
+ goto err_out;
+
+ printf("OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_across_nodes,
+ size_t page_size)
+{
+ void *numa1_map_ptr, *numa2_map_ptr;
+ struct timespec start_time;
+ int page_count = 2;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ if (numa_available() < 0) {
+ perror("NUMA support not enabled");
+ return KSFT_SKIP;
+ }
+ if (numa_max_node() < 1) {
+ printf("At least 2 NUMA nodes must be available\n");
+ return KSFT_SKIP;
+ }
+ if (ksm_write_sysfs(KSM_FP("merge_across_nodes"), merge_across_nodes))
+ return KSFT_FAIL;
+
+ /* allocate 2 pages in 2 different NUMA nodes and fill them with the same data */
+ numa1_map_ptr = numa_alloc_onnode(page_size, 0);
+ numa2_map_ptr = numa_alloc_onnode(page_size, 1);
+ if (!numa1_map_ptr || !numa2_map_ptr) {
+ perror("numa_alloc_onnode");
+ return KSFT_FAIL;
+ }
+
+ memset(numa1_map_ptr, '*', page_size);
+ memset(numa2_map_ptr, '*', page_size);
+
+ /* try to merge the pages */
+ if (ksm_merge_pages(numa1_map_ptr, page_size, start_time, timeout) ||
+ ksm_merge_pages(numa2_map_ptr, page_size, start_time, timeout))
+ goto err_out;
+
+ /*
+ * verify that the right number of pages are merged:
+ * 1) if merge_across_nodes was enabled, 2 duplicate pages will be merged;
+ * 2) if merge_across_nodes = 0, there must be 0 merged pages, since there is
+ * only 1 unique page in each node and they can't be shared.
+ */
+ if (merge_across_nodes && !assert_ksm_pages_count(page_count))
+ goto err_out;
+ else if (!merge_across_nodes && !assert_ksm_pages_count(0))
+ goto err_out;
+
+ numa_free(numa1_map_ptr, page_size);
+ numa_free(numa2_map_ptr, page_size);
+ printf("OK\n");
+ return KSFT_PASS;
+
+err_out:
+ numa_free(numa1_map_ptr, page_size);
+ numa_free(numa2_map_ptr, page_size);
+ printf("Not OK\n");
+ return KSFT_FAIL;
+}
+
+static int ksm_merge_time(int mapping, int prot, int timeout, size_t map_size)
+{
+ void *map_ptr;
+ struct timespec start_time, end_time;
+ unsigned long scan_time_ns;
+
+ map_size *= MB;
+
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ if (ksm_merge_pages(map_ptr, map_size, start_time, timeout))
+ goto err_out;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n", map_size / MB);
+ printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+ scan_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
+ ((double)scan_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr, map_size);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, map_size);
+ return KSFT_FAIL;
+}
+
+static int ksm_cow_time(int mapping, int prot, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time, end_time;
+ unsigned long cow_time_ns;
+
+ /* page_count must be less than 2*page_size */
+ size_t page_count = 4000;
+
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+ for (size_t i = 0; i < page_count - 1; i = i + 2)
+ memset(map_ptr + page_size * i, '-', 1);
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n\n", (page_size * page_count) / MB);
+ printf("Not merged pages:\n");
+ printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+ cow_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n\n", ((page_size * (page_count / 2)) / MB) /
+ ((double)cow_time_ns / NSEC_PER_SEC));
+
+ /* Create 2000 pairs of duplicate pages */
+ for (size_t i = 0; i < page_count - 1; i = i + 2) {
+ memset(map_ptr + page_size * i, '+', i / 2 + 1);
+ memset(map_ptr + page_size * (i + 1), '+', i / 2 + 1);
+ }
+ if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ for (size_t i = 0; i < page_count - 1; i = i + 2)
+ memset(map_ptr + page_size * i, '-', 1);
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Merged pages:\n");
+ printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+ cow_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", ((page_size * (page_count / 2)) / MB) /
+ ((double)cow_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret, opt;
+ int prot = 0;
+ int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT;
+ long page_count = KSM_PAGE_COUNT_DEFAULT;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+ struct ksm_sysfs ksm_sysfs_old;
+ int test_name = CHECK_KSM_MERGE;
+ bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT;
+ bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT;
+ long size_MB = 0;
+
+ while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPC")) != -1) {
+ switch (opt) {
+ case 'a':
+ prot = str_to_prot(optarg);
+ break;
+ case 'p':
+ page_count = atol(optarg);
+ if (page_count <= 0) {
+ printf("The number of pages must be greater than 0\n");
+ return KSFT_FAIL;
+ }
+ break;
+ case 'l':
+ ksm_scan_limit_sec = atoi(optarg);
+ if (ksm_scan_limit_sec <= 0) {
+ printf("Timeout value must be greater than 0\n");
+ return KSFT_FAIL;
+ }
+ break;
+ case 'h':
+ print_help();
+ break;
+ case 'z':
+ if (strcmp(optarg, "0") == 0)
+ use_zero_pages = 0;
+ else
+ use_zero_pages = 1;
+ break;
+ case 'm':
+ if (strcmp(optarg, "0") == 0)
+ merge_across_nodes = 0;
+ else
+ merge_across_nodes = 1;
+ break;
+ case 's':
+ size_MB = atoi(optarg);
+ if (size_MB <= 0) {
+ printf("Size must be greater than 0\n");
+ return KSFT_FAIL;
+ }
+ case 'M':
+ break;
+ case 'U':
+ test_name = CHECK_KSM_UNMERGE;
+ break;
+ case 'Z':
+ test_name = CHECK_KSM_ZERO_PAGE_MERGE;
+ break;
+ case 'N':
+ test_name = CHECK_KSM_NUMA_MERGE;
+ break;
+ case 'P':
+ test_name = KSM_MERGE_TIME;
+ break;
+ case 'C':
+ test_name = KSM_COW_TIME;
+ break;
+ default:
+ return KSFT_FAIL;
+ }
+ }
+
+ if (prot == 0)
+ prot = str_to_prot(KSM_PROT_STR_DEFAULT);
+
+ if (access(KSM_SYSFS_PATH, F_OK)) {
+ printf("Config KSM not enabled\n");
+ return KSFT_SKIP;
+ }
+
+ if (ksm_save_def(&ksm_sysfs_old)) {
+ printf("Cannot save default tunables\n");
+ return KSFT_FAIL;
+ }
+
+ if (ksm_write_sysfs(KSM_FP("run"), 2) ||
+ ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) ||
+ ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
+ ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count))
+ return KSFT_FAIL;
+
+ switch (test_name) {
+ case CHECK_KSM_MERGE:
+ ret = check_ksm_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count,
+ ksm_scan_limit_sec, page_size);
+ break;
+ case CHECK_KSM_UNMERGE:
+ ret = check_ksm_unmerge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+ page_size);
+ break;
+ case CHECK_KSM_ZERO_PAGE_MERGE:
+ ret = check_ksm_zero_page_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count,
+ ksm_scan_limit_sec, use_zero_pages, page_size);
+ break;
+ case CHECK_KSM_NUMA_MERGE:
+ ret = check_ksm_numa_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+ merge_across_nodes, page_size);
+ break;
+ case KSM_MERGE_TIME:
+ if (size_MB == 0) {
+ printf("Option '-s' is required.\n");
+ return KSFT_FAIL;
+ }
+ ret = ksm_merge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+ size_MB);
+ break;
+ case KSM_COW_TIME:
+ ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+ page_size);
+ break;
+ }
+
+ if (ksm_restore(&ksm_sysfs_old)) {
+ printf("Cannot restore default tunables\n");
+ return KSFT_FAIL;
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/vm/mlock-random-test.c
index ff4d72eb74b9..782ea94dee2f 100644
--- a/tools/testing/selftests/vm/mlock-random-test.c
+++ b/tools/testing/selftests/vm/mlock-random-test.c
@@ -70,7 +70,7 @@ int get_proc_locked_vm_size(void)
}
}
- perror("cann't parse VmLck in /proc/self/status\n");
+ perror("cannot parse VmLck in /proc/self/status\n");
fclose(f);
return -1;
}
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index d09a6b71f1e9..45e803af7c77 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -377,6 +377,102 @@ else
exitcode=1
fi
+echo "-------------------------------------------------------"
+echo "running KSM MADV_MERGEABLE test with 10 identical pages"
+echo "-------------------------------------------------------"
+./ksm_tests -M -p 10
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "------------------------"
+echo "running KSM unmerge test"
+echo "------------------------"
+./ksm_tests -U
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "----------------------------------------------------------"
+echo "running KSM test with 10 zero pages and use_zero_pages = 0"
+echo "----------------------------------------------------------"
+./ksm_tests -Z -p 10 -z 0
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "----------------------------------------------------------"
+echo "running KSM test with 10 zero pages and use_zero_pages = 1"
+echo "----------------------------------------------------------"
+./ksm_tests -Z -p 10 -z 1
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "-------------------------------------------------------------"
+echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 1"
+echo "-------------------------------------------------------------"
+./ksm_tests -N -m 1
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "-------------------------------------------------------------"
+echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 0"
+echo "-------------------------------------------------------------"
+./ksm_tests -N -m 0
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
exit $exitcode
exit $exitcode
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 2ea438e6b8b1..10ab56c2484a 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -566,6 +566,18 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
}
}
+static void wake_range(int ufd, unsigned long addr, unsigned long len)
+{
+ struct uffdio_range uffdio_wake;
+
+ uffdio_wake.start = addr;
+ uffdio_wake.len = len;
+
+ if (ioctl(ufd, UFFDIO_WAKE, &uffdio_wake))
+ fprintf(stderr, "error waking %lu\n",
+ addr), exit(1);
+}
+
static int __copy_page(int ufd, unsigned long offset, bool retry)
{
struct uffdio_copy uffdio_copy;
@@ -585,6 +597,7 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
if (uffdio_copy.copy != -EEXIST)
err("UFFDIO_COPY error: %"PRId64,
(int64_t)uffdio_copy.copy);
+ wake_range(ufd, uffdio_copy.dst, page_size);
} else if (uffdio_copy.copy != page_size) {
err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy);
} else {
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
index 6da0ac3f0135..cc3de6ff9fba 100644
--- a/tools/testing/selftests/x86/mov_ss_trap.c
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -47,7 +47,6 @@
unsigned short ss;
extern unsigned char breakpoint_insn[];
sigjmp_buf jmpbuf;
-static unsigned char altstack_data[SIGSTKSZ];
static void enable_watchpoint(void)
{
@@ -250,13 +249,14 @@ int main()
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; SYSENTER\n");
stack_t stack = {
- .ss_sp = altstack_data,
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
err(1, "sigaltstack");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
nr = SYS_getpid;
+ free(stack.ss_sp);
/* Clear EBP first to make sure we segfault cleanly. */
asm volatile ("xorl %%ebp, %%ebp; mov %[ss], %%ss; SYSENTER" : "+a" (nr)
: [ss] "m" (ss) : "flags", "rcx"
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 57c4f67f16ef..5d7961a5f7f6 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -138,9 +138,6 @@ static unsigned short LDT3(int idx)
return (idx << 3) | 7;
}
-/* Our sigaltstack scratch space. */
-static char altstack_data[SIGSTKSZ];
-
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -771,7 +768,8 @@ int main()
setup_ldt();
stack_t stack = {
- .ss_sp = altstack_data,
+ /* Our sigaltstack scratch space. */
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
@@ -872,5 +870,6 @@ int main()
total_nerrs += test_nonstrict_ss();
#endif
+ free(stack.ss_sp);
return total_nerrs ? 1 : 0;
}
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 120ac741fe44..9a30f443e928 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -57,7 +57,6 @@ static void clearhandler(int sig)
static volatile sig_atomic_t sig_traps, sig_eflags;
sigjmp_buf jmpbuf;
-static unsigned char altstack_data[SIGSTKSZ];
#ifdef __x86_64__
# define REG_IP REG_RIP
@@ -210,7 +209,7 @@ int main()
unsigned long nr = SYS_getpid;
printf("[RUN]\tSet TF and check SYSENTER\n");
stack_t stack = {
- .ss_sp = altstack_data,
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
@@ -219,6 +218,7 @@ int main()
SA_RESETHAND | SA_ONSTACK);
sethandler(SIGILL, print_and_longjmp, SA_RESETHAND);
set_eflags(get_eflags() | X86_EFLAGS_TF);
+ free(stack.ss_sp);
/* Clear EBP first to make sure we segfault cleanly. */
asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx"
#ifdef __x86_64__
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index bff474b5efc6..461fa41a4d02 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -17,9 +17,6 @@
#include "helpers.h"
-/* Our sigaltstack scratch space. */
-static unsigned char altstack_data[SIGSTKSZ];
-
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -104,7 +101,8 @@ static void sigill(int sig, siginfo_t *info, void *ctx_void)
int main()
{
stack_t stack = {
- .ss_sp = altstack_data,
+ /* Our sigaltstack scratch space. */
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
@@ -233,5 +231,6 @@ int main()
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
#endif
+ free(stack.ss_sp);
return 0;
}