aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/testing/selftests
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests')
-rw-r--r--tools/testing/selftests/Makefile3
-rw-r--r--tools/testing/selftests/arm64/fp/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/fp/Makefile11
-rw-r--r--tools/testing/selftests/arm64/fp/TODO4
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl-sve.c14
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl.S10
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl.h8
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c7
-rw-r--r--tools/testing/selftests/arm64/fp/vec-syscfg.c593
-rw-r--r--tools/testing/selftests/arm64/mte/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c2
-rw-r--r--tools/testing/selftests/arm64/pauth/pac.c10
-rw-r--r--tools/testing/selftests/arm64/signal/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.h2
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c10
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/TODO2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c92
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sve_regs.c126
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sve_vl.c68
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.c34
-rw-r--r--tools/testing/selftests/bpf/.gitignore5
-rw-r--r--tools/testing/selftests/bpf/Makefile29
-rw-r--r--tools/testing/selftests/bpf/Makefile.docs3
-rw-r--r--tools/testing/selftests/bpf/README.rst26
-rw-r--r--tools/testing/selftests/bpf/bench.c1
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_rename.c2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c6
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c2
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h19
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c137
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h16
-rw-r--r--tools/testing/selftests/bpf/netcnt_common.h38
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c147
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomics.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c110
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c254
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c226
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c114
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c97
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c623
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_module.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_write.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c84
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_link.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c79
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_autosize.c22
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_sleep.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/hashmap.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_module.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/link_pinning.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c288
-rw-r--r--tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c559
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netcnt.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netns_cookie.c80
-rw-r--r--tools/testing/selftests/bpf/prog_tests/obj_name.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_branches.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_buffer.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_link.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/probe_user.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rdonly_maps.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c53
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c66
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skeleton.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_fields.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c431
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c70
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/static_linked.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/syscall.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_pt_regs.c46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_bpf.c395
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_overhead.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_mim.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_printk.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trampoline_count.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/udp_limit.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c574
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c105
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c39
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_link.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp_release.c26
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_netlink.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c72
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_file.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c3
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_unix.c80
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h10
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_dropper.c26
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_test.c84
-rw-r--r--tools/testing/selftests/bpf/progs/kfree_skb.c4
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps1.c2
-rw-r--r--tools/testing/selftests/bpf/progs/netcnt_prog.c8
-rw-r--r--tools/testing/selftests/bpf/progs/netns_cookie_prog.c84
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c39
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c16
-rw-r--r--tools/testing/selftests/bpf/progs/syscall.c121
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall3.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall4.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall5.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_cookie.c85
-rw-r--r--tools/testing/selftests/bpf/progs/test_check_mtu.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_autosize.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func_args.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_weak.c56
-rw-r--r--tools/testing/selftests/bpf/progs/test_lookup_and_delete.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_migrate_reuseport.c135
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_link.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_rdonly_maps.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_skeleton.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf_single.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked1.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked2.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs.c13
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_pt_regs.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_bpf.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c20
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c297
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim.c88
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim_reject.c74
-rw-r--r--tools/testing/selftests/bpf/progs/trace_printk.c6
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c94
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_tx.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool.sh6
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh2
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py586
-rwxr-xr-xtools/testing/selftests/bpf/test_doc_build.sh11
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c8
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh13
-rw-r--r--tools/testing/selftests/bpf/test_maps.c275
-rw-r--r--tools/testing/selftests/bpf/test_netcnt.c148
-rw-r--r--tools/testing/selftests/bpf/test_progs.c110
-rw-r--r--tools/testing/selftests/bpf/test_progs.h21
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh2
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c7
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect_multi.sh204
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_veth.sh2
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh10
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c87
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h4
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c12
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c229
-rw-r--r--tools/testing/selftests/bpf/xdp_redirect_multi.c226
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c681
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.h63
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh30
-rw-r--r--tools/testing/selftests/cgroup/.gitignore3
-rw-r--r--tools/testing/selftests/cgroup/Makefile2
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c51
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h2
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c57
-rw-r--r--tools/testing/selftests/cgroup/test_kill.c297
-rw-r--r--tools/testing/selftests/cpufreq/config2
-rw-r--r--tools/testing/selftests/damon/Makefile7
-rw-r--r--tools/testing/selftests/damon/_chk_dependency.sh28
-rw-r--r--tools/testing/selftests/damon/debugfs_attrs.sh75
-rw-r--r--tools/testing/selftests/drivers/dma-buf/udmabuf.c5
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh3
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh3
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/port_scale.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh69
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh24
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/router_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_sample.sh12
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh167
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/fib.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/nexthop.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/psample.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh2
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c17
-rw-r--r--tools/testing/selftests/firmware/fw_namespace.c3
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc90
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc38
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions24
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc53
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc18
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore2
-rw-r--r--tools/testing/selftests/futex/functional/Makefile7
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue.c136
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait.c171
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c126
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh6
-rw-r--r--tools/testing/selftests/kvm/.gitignore11
-rw-r--r--tools/testing/selftests/kvm/Makefile19
-rw-r--r--tools/testing/selftests/kvm/aarch64/debug-exceptions.c250
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c3
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c440
-rw-r--r--tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c121
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c425
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c18
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c114
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c9
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c2
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h86
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h47
-rw-r--r--tools/testing/selftests/kvm/include/perf_test_util.h2
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h7
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/apic.h91
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/evmcs.h (renamed from tools/testing/selftests/kvm/include/evmcs.h)2
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/hyperv.h188
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h100
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h11
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c249
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c9
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/handlers.S126
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c131
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/ucall.c2
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c6
-rw-r--r--tools/testing/selftests/kvm/lib/guest_modes.c16
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c83
-rw-r--r--tools/testing/selftests/kvm/lib/perf_test_util.c24
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c17
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c39
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/apic.c45
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c352
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/svm.c9
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c52
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c3
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c2
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c286
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c5
-rw-r--r--tools/testing/selftests/kvm/steal_time.c26
-rw-r--r--tools/testing/selftests/kvm/x86_64/debug_regs.c24
-rw-r--r--tools/testing/selftests/kvm/x86_64/emulator_error_test.c219
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c79
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_cpuid_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_clock.c12
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c684
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmio_warning_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmu_role_test.c147
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c10
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c74
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c128
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c7
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c9
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c10
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c242
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c65
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c17
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c2
-rw-r--r--tools/testing/selftests/lib.mk2
-rw-r--r--tools/testing/selftests/lib/Makefile2
-rw-r--r--tools/testing/selftests/lib/config1
-rwxr-xr-xtools/testing/selftests/lib/scanf.sh4
-rw-r--r--tools/testing/selftests/lkdtm/config9
-rwxr-xr-xtools/testing/selftests/lkdtm/run.sh12
-rwxr-xr-xtools/testing/selftests/lkdtm/stack-entropy.sh1
-rw-r--r--tools/testing/selftests/lkdtm/tests.txt14
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c2
-rwxr-xr-xtools/testing/selftests/memory-hotplug/mem-on-off-test.sh4
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c88
-rw-r--r--tools/testing/selftests/move_mount_set_group/.gitignore1
-rw-r--r--tools/testing/selftests/move_mount_set_group/Makefile7
-rw-r--r--tools/testing/selftests/move_mount_set_group/config1
-rw-r--r--tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c375
-rw-r--r--tools/testing/selftests/nci/nci_dev.c416
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile8
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c438
-rwxr-xr-xtools/testing/selftests/net/altnames.sh2
-rw-r--r--tools/testing/selftests/net/config3
-rwxr-xr-xtools/testing/selftests/net/devlink_port_split.py8
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh96
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh12
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh7
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh364
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh45
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh456
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh172
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh458
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_dsfield.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_l4port.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/skbedit_priority.sh2
-rwxr-xr-xtools/testing/selftests/net/gre_gso.sh236
-rw-r--r--tools/testing/selftests/net/gro.c1095
-rwxr-xr-xtools/testing/selftests/net/gro.sh99
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh13
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh668
-rw-r--r--tools/testing/selftests/net/ioam6_parser.c676
-rw-r--r--tools/testing/selftests/net/ipsec.c165
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c125
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh65
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh517
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh4
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c16
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh7
-rw-r--r--tools/testing/selftests/net/nettest.c83
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh212
-rw-r--r--tools/testing/selftests/net/psock_fanout.c4
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh3
-rwxr-xr-xtools/testing/selftests/net/run_afpackettests5
-rwxr-xr-xtools/testing/selftests/net/setup_loopback.sh118
-rw-r--r--tools/testing/selftests/net/setup_veth.sh41
-rw-r--r--tools/testing/selftests/net/so_netns_cookie.c61
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh576
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh9
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh9
-rw-r--r--tools/testing/selftests/net/timestamping.c55
-rw-r--r--tools/testing/selftests/net/tls.c90
-rw-r--r--tools/testing/selftests/net/toeplitz.c585
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh199
-rwxr-xr-xtools/testing/selftests/net/toeplitz_client.sh28
-rwxr-xr-xtools/testing/selftests/net/unicast_extensions.sh22
-rwxr-xr-xtools/testing/selftests/net/veth.sh183
-rwxr-xr-xtools/testing/selftests/net/vrf_strict_mode_test.sh9
-rw-r--r--tools/testing/selftests/netfilter/Makefile2
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh167
-rwxr-xr-xtools/testing/selftests/netfilter/nft_flowtable.sh1
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh145
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat_zones.sh309
-rwxr-xr-xtools/testing/selftests/netfilter/nft_zones_many.sh156
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c11
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/null_syscall.c3
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c17
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.h2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c63
l---------tools/testing/selftests/powerpc/primitives/asm/extable.h1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c1
-rw-r--r--tools/testing/selftests/powerpc/security/Makefile2
-rwxr-xr-xtools/testing/selftests/powerpc/security/mitigation-patching.sh75
-rw-r--r--tools/testing/selftests/powerpc/signal/signal_tm.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-exec.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-fork.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-poison.c2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-resched-dscr.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-stack.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-sigreturn.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall-asm.S37
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall.c38
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tar.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tmspr.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-trap.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmxcopy.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h36
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh10
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kcsan-collapse.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh37
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh106
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh6
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh40
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh88
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh5
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh30
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh261
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh24
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh49
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh98
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh39
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST17
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot8
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS012
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS032
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE542
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT2
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/PREEMPT2
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h2
-rw-r--r--tools/testing/selftests/resctrl/README2
-rw-r--r--tools/testing/selftests/resctrl/resctrl_tests.c4
-rw-r--r--tools/testing/selftests/rlimits/.gitignore2
-rw-r--r--tools/testing/selftests/rlimits/Makefile6
-rw-r--r--tools/testing/selftests/rlimits/config1
-rw-r--r--tools/testing/selftests/rlimits/rlimits-per-userns.c161
-rw-r--r--tools/testing/selftests/safesetid/safesetid-test.c2
-rw-r--r--tools/testing/selftests/sched/.gitignore1
-rw-r--r--tools/testing/selftests/sched/Makefile14
-rw-r--r--tools/testing/selftests/sched/config1
-rw-r--r--tools/testing/selftests/sched/cs_prctl_test.c336
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c10
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c51
-rw-r--r--tools/testing/selftests/sgx/call.S6
-rw-r--r--tools/testing/selftests/sgx/defines.h10
-rw-r--r--tools/testing/selftests/sgx/load.c19
-rw-r--r--tools/testing/selftests/sgx/main.c239
-rw-r--r--tools/testing/selftests/sgx/main.h4
-rw-r--r--tools/testing/selftests/sgx/sigstruct.c41
-rw-r--r--tools/testing/selftests/sgx/test_encl.c19
-rw-r--r--tools/testing/selftests/sgx/test_encl.lds3
-rw-r--r--tools/testing/selftests/sigaltstack/sas.c20
-rwxr-xr-xtools/testing/selftests/splice/short_splice_read.sh119
-rw-r--r--tools/testing/selftests/sync/config1
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py42
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ct.json45
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json24
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json28
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json137
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config.py1
-rw-r--r--tools/testing/selftests/timers/rtcpie.c10
-rw-r--r--tools/testing/selftests/vm/.gitignore5
-rw-r--r--tools/testing/selftests/vm/Makefile11
-rw-r--r--tools/testing/selftests/vm/charge_reserved_hugetlb.sh5
-rw-r--r--tools/testing/selftests/vm/gup_test.c96
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c158
-rw-r--r--tools/testing/selftests/vm/hugetlb_reparenting_test.sh5
-rw-r--r--tools/testing/selftests/vm/khugepaged.c4
-rw-r--r--tools/testing/selftests/vm/ksm_tests.c662
-rw-r--r--tools/testing/selftests/vm/madv_populate.c342
-rw-r--r--tools/testing/selftests/vm/memfd_secret.c296
-rw-r--r--tools/testing/selftests/vm/mlock-random-test.c2
-rw-r--r--tools/testing/selftests/vm/mremap_test.c118
-rw-r--r--tools/testing/selftests/vm/pkey-x86.h1
-rw-r--r--tools/testing/selftests/vm/protection_keys.c85
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh129
-rw-r--r--tools/testing/selftests/vm/split_huge_page_test.c2
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c1086
-rw-r--r--tools/testing/selftests/x86/Makefile5
-rw-r--r--tools/testing/selftests/x86/corrupt_xstate_header.c114
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c4
-rw-r--r--tools/testing/selftests/x86/sigaltstack.c128
-rw-r--r--tools/testing/selftests/x86/sigreturn.c7
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c4
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c7
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c491
510 files changed, 30482 insertions, 3670 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index bc3299a20338..c852eb40c4f7 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -35,9 +35,11 @@ TARGETS += memory-hotplug
TARGETS += mincore
TARGETS += mount
TARGETS += mount_setattr
+TARGETS += move_mount_set_group
TARGETS += mqueue
TARGETS += nci
TARGETS += net
+TARGETS += net/af_unix
TARGETS += net/forwarding
TARGETS += net/mptcp
TARGETS += netfilter
@@ -49,6 +51,7 @@ TARGETS += proc
TARGETS += pstore
TARGETS += ptrace
TARGETS += openat2
+TARGETS += rlimits
TARGETS += rseq
TARGETS += rtc
TARGETS += seccomp
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index d66f76d2a650..b67395903b9b 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -1,5 +1,7 @@
fpsimd-test
+rdvl-sve
sve-probe-vls
sve-ptrace
sve-test
+vec-syscfg
vlset
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index a57009d3a0dc..f2abdd6ba12e 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -1,17 +1,22 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -I../../../../../usr/include/
-TEST_GEN_PROGS := sve-ptrace sve-probe-vls
-TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls vec-syscfg
+TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \
+ rdvl-sve \
+ sve-test sve-stress \
+ vlset
all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
fpsimd-test: fpsimd-test.o
$(CC) -nostdlib $^ -o $@
+rdvl-sve: rdvl-sve.o rdvl.o
sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
-sve-probe-vls: sve-probe-vls.o
+sve-probe-vls: sve-probe-vls.o rdvl.o
sve-test: sve-test.o
$(CC) -nostdlib $^ -o $@
+vec-syscfg: vec-syscfg.o rdvl.o
vlset: vlset.o
include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/TODO b/tools/testing/selftests/arm64/fp/TODO
new file mode 100644
index 000000000000..b6b7ebfcf362
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/TODO
@@ -0,0 +1,4 @@
+- Test unsupported values in the ABIs.
+- More coverage for ptrace (eg, vector length conversions).
+- Coverage for signals.
+- Test PR_SVE_VL_INHERITY after a double fork.
diff --git a/tools/testing/selftests/arm64/fp/rdvl-sve.c b/tools/testing/selftests/arm64/fp/rdvl-sve.c
new file mode 100644
index 000000000000..7f8a13a18f5d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl-sve.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+
+#include "rdvl.h"
+
+int main(void)
+{
+ int vl = rdvl_sve();
+
+ printf("%d\n", vl);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/rdvl.S b/tools/testing/selftests/arm64/fp/rdvl.S
new file mode 100644
index 000000000000..c916c1c9defd
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl.S
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+
+.arch_extension sve
+
+.globl rdvl_sve
+rdvl_sve:
+ hint 34 // BTI C
+ rdvl x0, #1
+ ret
diff --git a/tools/testing/selftests/arm64/fp/rdvl.h b/tools/testing/selftests/arm64/fp/rdvl.h
new file mode 100644
index 000000000000..7c9d953fc9e7
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef RDVL_H
+#define RDVL_H
+
+int rdvl_sve(void);
+
+#endif
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
index b29cbc642c57..a24eca7a4ecb 100644
--- a/tools/testing/selftests/arm64/fp/sve-probe-vls.c
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -13,6 +13,7 @@
#include <asm/sigcontext.h>
#include "../../kselftest.h"
+#include "rdvl.h"
int main(int argc, char **argv)
{
@@ -25,7 +26,7 @@ int main(int argc, char **argv)
ksft_set_plan(2);
if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
- ksft_exit_skip("SVE not available");
+ ksft_exit_skip("SVE not available\n");
/*
* Enumerate up to SVE_VQ_MAX vector lengths
@@ -38,6 +39,10 @@ int main(int argc, char **argv)
vl &= PR_SVE_VL_LEN_MASK;
+ if (rdvl_sve() != vl)
+ ksft_exit_fail_msg("PR_SVE_SET_VL reports %d, RDVL %d\n",
+ vl, rdvl_sve());
+
if (!sve_vl_valid(vl))
ksft_exit_fail_msg("VL %d invalid\n", vl);
vq = sve_vq_from_vl(vl);
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
new file mode 100644
index 000000000000..c02071dcb563
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -0,0 +1,593 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ * Original author: Mark Brown <broonie@kernel.org>
+ */
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/hwcap.h>
+
+#include "../../kselftest.h"
+#include "rdvl.h"
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+
+#define ARCH_MIN_VL SVE_VL_MIN
+
+struct vec_data {
+ const char *name;
+ unsigned long hwcap_type;
+ unsigned long hwcap;
+ const char *rdvl_binary;
+ int (*rdvl)(void);
+
+ int prctl_get;
+ int prctl_set;
+ const char *default_vl_file;
+
+ int default_vl;
+ int min_vl;
+ int max_vl;
+};
+
+
+static struct vec_data vec_data[] = {
+ {
+ .name = "SVE",
+ .hwcap_type = AT_HWCAP,
+ .hwcap = HWCAP_SVE,
+ .rdvl = rdvl_sve,
+ .rdvl_binary = "./rdvl-sve",
+ .prctl_get = PR_SVE_GET_VL,
+ .prctl_set = PR_SVE_SET_VL,
+ .default_vl_file = "/proc/sys/abi/sve_default_vector_length",
+ },
+};
+
+static int stdio_read_integer(FILE *f, const char *what, int *val)
+{
+ int n = 0;
+ int ret;
+
+ ret = fscanf(f, "%d%*1[\n]%n", val, &n);
+ if (ret < 1 || n < 1) {
+ ksft_print_msg("failed to parse integer from %s\n", what);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Start a new process and return the vector length it sees */
+static int get_child_rdvl(struct vec_data *data)
+{
+ FILE *out;
+ int pipefd[2];
+ pid_t pid, child;
+ int read_vl, ret;
+
+ ret = pipe(pipefd);
+ if (ret == -1) {
+ ksft_print_msg("pipe() failed: %d (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ fflush(stdout);
+
+ child = fork();
+ if (child == -1) {
+ ksft_print_msg("fork() failed: %d (%s)\n",
+ errno, strerror(errno));
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return -1;
+ }
+
+ /* Child: put vector length on the pipe */
+ if (child == 0) {
+ /*
+ * Replace stdout with the pipe, errors to stderr from
+ * here as kselftest prints to stdout.
+ */
+ ret = dup2(pipefd[1], 1);
+ if (ret == -1) {
+ fprintf(stderr, "dup2() %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ /* exec() a new binary which puts the VL on stdout */
+ ret = execl(data->rdvl_binary, data->rdvl_binary, NULL);
+ fprintf(stderr, "execl(%s) failed: %d\n",
+ data->rdvl_binary, errno, strerror(errno));
+
+ exit(EXIT_FAILURE);
+ }
+
+ close(pipefd[1]);
+
+ /* Parent; wait for the exit status from the child & verify it */
+ do {
+ pid = wait(&ret);
+ if (pid == -1) {
+ ksft_print_msg("wait() failed: %d (%s)\n",
+ errno, strerror(errno));
+ close(pipefd[0]);
+ return -1;
+ }
+ } while (pid != child);
+
+ assert(pid == child);
+
+ if (!WIFEXITED(ret)) {
+ ksft_print_msg("child exited abnormally\n");
+ close(pipefd[0]);
+ return -1;
+ }
+
+ if (WEXITSTATUS(ret) != 0) {
+ ksft_print_msg("child returned error %d\n",
+ WEXITSTATUS(ret));
+ close(pipefd[0]);
+ return -1;
+ }
+
+ out = fdopen(pipefd[0], "r");
+ if (!out) {
+ ksft_print_msg("failed to open child stdout\n");
+ close(pipefd[0]);
+ return -1;
+ }
+
+ ret = stdio_read_integer(out, "child", &read_vl);
+ fclose(out);
+ if (ret != 0)
+ return ret;
+
+ return read_vl;
+}
+
+static int file_read_integer(const char *name, int *val)
+{
+ FILE *f;
+ int ret;
+
+ f = fopen(name, "r");
+ if (!f) {
+ ksft_test_result_fail("Unable to open %s: %d (%s)\n",
+ name, errno,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = stdio_read_integer(f, name, val);
+ fclose(f);
+
+ return ret;
+}
+
+static int file_write_integer(const char *name, int val)
+{
+ FILE *f;
+ int ret;
+
+ f = fopen(name, "w");
+ if (!f) {
+ ksft_test_result_fail("Unable to open %s: %d (%s)\n",
+ name, errno,
+ strerror(errno));
+ return -1;
+ }
+
+ fprintf(f, "%d", val);
+ fclose(f);
+ if (ret < 0) {
+ ksft_test_result_fail("Error writing %d to %s\n",
+ val, name);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Verify that we can read the default VL via proc, checking that it
+ * is set in a freshly spawned child.
+ */
+static void proc_read_default(struct vec_data *data)
+{
+ int default_vl, child_vl, ret;
+
+ ret = file_read_integer(data->default_vl_file, &default_vl);
+ if (ret != 0)
+ return;
+
+ /* Is this the actual default seen by new processes? */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != default_vl) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ default_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s default vector length %d\n", data->name,
+ default_vl);
+ data->default_vl = default_vl;
+}
+
+/* Verify that we can write a minimum value and have it take effect */
+static void proc_write_min(struct vec_data *data)
+{
+ int ret, new_default, child_vl;
+
+ if (geteuid() != 0) {
+ ksft_test_result_skip("Need to be root to write to /proc\n");
+ return;
+ }
+
+ ret = file_write_integer(data->default_vl_file, ARCH_MIN_VL);
+ if (ret != 0)
+ return;
+
+ /* What was the new value? */
+ ret = file_read_integer(data->default_vl_file, &new_default);
+ if (ret != 0)
+ return;
+
+ /* Did it take effect in a new process? */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != new_default) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ new_default, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s minimum vector length %d\n", data->name,
+ new_default);
+ data->min_vl = new_default;
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* Verify that we can write a maximum value and have it take effect */
+static void proc_write_max(struct vec_data *data)
+{
+ int ret, new_default, child_vl;
+
+ if (geteuid() != 0) {
+ ksft_test_result_skip("Need to be root to write to /proc\n");
+ return;
+ }
+
+ /* -1 is accepted by the /proc interface as the maximum VL */
+ ret = file_write_integer(data->default_vl_file, -1);
+ if (ret != 0)
+ return;
+
+ /* What was the new value? */
+ ret = file_read_integer(data->default_vl_file, &new_default);
+ if (ret != 0)
+ return;
+
+ /* Did it take effect in a new process? */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != new_default) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ new_default, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s maximum vector length %d\n", data->name,
+ new_default);
+ data->max_vl = new_default;
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* Can we read back a VL from prctl? */
+static void prctl_get(struct vec_data *data)
+{
+ int ret;
+
+ ret = prctl(data->prctl_get);
+ if (ret == -1) {
+ ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+
+ /* Mask out any flags */
+ ret &= PR_SVE_VL_LEN_MASK;
+
+ /* Is that what we can read back directly? */
+ if (ret == data->rdvl())
+ ksft_test_result_pass("%s current VL is %d\n",
+ data->name, ret);
+ else
+ ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n",
+ data->name, ret, data->rdvl());
+}
+
+/* Does the prctl let us set the VL we already have? */
+static void prctl_set_same(struct vec_data *data)
+{
+ int cur_vl = data->rdvl();
+ int ret;
+
+ ret = prctl(data->prctl_set, cur_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+
+ if (cur_vl != data->rdvl())
+ ksft_test_result_pass("%s current VL is %d\n",
+ data->name, ret);
+ else
+ ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n",
+ data->name, ret, data->rdvl());
+}
+
+/* Can we set a new VL for this process? */
+static void prctl_set(struct vec_data *data)
+{
+ int ret;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ /* Try to set the minimum VL */
+ ret = prctl(data->prctl_set, data->min_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ if ((ret & PR_SVE_VL_LEN_MASK) != data->min_vl) {
+ ksft_test_result_fail("%s prctl set %d but return value is %d\n",
+ data->name, data->min_vl, data->rdvl());
+ return;
+ }
+
+ if (data->rdvl() != data->min_vl) {
+ ksft_test_result_fail("%s set %d but RDVL is %d\n",
+ data->name, data->min_vl, data->rdvl());
+ return;
+ }
+
+ /* Try to set the maximum VL */
+ ret = prctl(data->prctl_set, data->max_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->max_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ if ((ret & PR_SVE_VL_LEN_MASK) != data->max_vl) {
+ ksft_test_result_fail("%s prctl() set %d but return value is %d\n",
+ data->name, data->max_vl, data->rdvl());
+ return;
+ }
+
+ /* The _INHERIT flag should not be present when we read the VL */
+ ret = prctl(data->prctl_get);
+ if (ret == -1) {
+ ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+
+ if (ret & PR_SVE_VL_INHERIT) {
+ ksft_test_result_fail("%s prctl() reports _INHERIT\n",
+ data->name);
+ return;
+ }
+
+ ksft_test_result_pass("%s prctl() set min/max\n", data->name);
+}
+
+/* If we didn't request it a new VL shouldn't affect the child */
+static void prctl_set_no_child(struct vec_data *data)
+{
+ int ret, child_vl;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ ret = prctl(data->prctl_set, data->min_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* Ensure the default VL is different */
+ ret = file_write_integer(data->default_vl_file, data->max_vl);
+ if (ret != 0)
+ return;
+
+ /* Check that the child has the default we just set */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != data->max_vl) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ data->max_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s vector length used default\n", data->name);
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* If we didn't request it a new VL shouldn't affect the child */
+static void prctl_set_for_child(struct vec_data *data)
+{
+ int ret, child_vl;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ ret = prctl(data->prctl_set, data->min_vl | PR_SVE_VL_INHERIT);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* The _INHERIT flag should be present when we read the VL */
+ ret = prctl(data->prctl_get);
+ if (ret == -1) {
+ ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+ if (!(ret & PR_SVE_VL_INHERIT)) {
+ ksft_test_result_fail("%s prctl() does not report _INHERIT\n",
+ data->name);
+ return;
+ }
+
+ /* Ensure the default VL is different */
+ ret = file_write_integer(data->default_vl_file, data->max_vl);
+ if (ret != 0)
+ return;
+
+ /* Check that the child inherited our VL */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != data->min_vl) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ data->min_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s vector length was inherited\n", data->name);
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* _ONEXEC takes effect only in the child process */
+static void prctl_set_onexec(struct vec_data *data)
+{
+ int ret, child_vl;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ /* Set a known value for the default and our current VL */
+ ret = file_write_integer(data->default_vl_file, data->max_vl);
+ if (ret != 0)
+ return;
+
+ ret = prctl(data->prctl_set, data->max_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* Set a different value for the child to have on exec */
+ ret = prctl(data->prctl_set, data->min_vl | PR_SVE_SET_VL_ONEXEC);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* Our current VL should stay the same */
+ if (data->rdvl() != data->max_vl) {
+ ksft_test_result_fail("%s VL changed by _ONEXEC prctl()\n",
+ data->name);
+ return;
+ }
+
+ /* Check that the child inherited our VL */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != data->min_vl) {
+ ksft_test_result_fail("Set %d _ONEXEC but child VL is %d\n",
+ data->min_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s vector length set on exec\n", data->name);
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+typedef void (*test_type)(struct vec_data *);
+
+static const test_type tests[] = {
+ /*
+ * The default/min/max tests must be first and in this order
+ * to provide data for other tests.
+ */
+ proc_read_default,
+ proc_write_min,
+ proc_write_max,
+
+ prctl_get,
+ prctl_set,
+ prctl_set_no_child,
+ prctl_set_for_child,
+ prctl_set_onexec,
+};
+
+int main(void)
+{
+ int i, j;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data));
+
+ for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
+ struct vec_data *data = &vec_data[i];
+ unsigned long supported;
+
+ supported = getauxval(data->hwcap_type) & data->hwcap;
+
+ for (j = 0; j < ARRAY_SIZE(tests); j++) {
+ if (supported)
+ tests[j](data);
+ else
+ ksft_test_result_skip("%s not supported\n",
+ data->name);
+ }
+ }
+
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
index bc3ac63f3314..d1fe4ddf1669 100644
--- a/tools/testing/selftests/arm64/mte/.gitignore
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -1,4 +1,5 @@
check_buffer_fill
+check_gcr_el1_cswitch
check_tags_inclusion
check_child_memory
check_mmap_options
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index f50ac31920d1..0328a1e08f65 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -298,7 +298,7 @@ int mte_default_setup(void)
int ret;
if (!(hwcaps2 & HWCAP2_MTE)) {
- ksft_print_msg("FAIL: MTE features unavailable\n");
+ ksft_print_msg("SKIP: MTE features unavailable\n");
return KSFT_SKIP;
}
/* Get current mte mode */
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
index 592fe538506e..b743daa772f5 100644
--- a/tools/testing/selftests/arm64/pauth/pac.c
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -25,13 +25,15 @@
do { \
unsigned long hwcaps = getauxval(AT_HWCAP); \
/* data key instructions are not in NOP space. This prevents a SIGILL */ \
- ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \
+ if (!(hwcaps & HWCAP_PACA)) \
+ SKIP(return, "PAUTH not enabled"); \
} while (0)
#define ASSERT_GENERIC_PAUTH_ENABLED() \
do { \
unsigned long hwcaps = getauxval(AT_HWCAP); \
/* generic key instructions are not in NOP space. This prevents a SIGILL */ \
- ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \
+ if (!(hwcaps & HWCAP_PACG)) \
+ SKIP(return, "Generic PAUTH not enabled"); \
} while (0)
void sign_specific(struct signatures *sign, size_t val)
@@ -256,7 +258,7 @@ TEST(single_thread_different_keys)
unsigned long hwcaps = getauxval(AT_HWCAP);
/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
- ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+ ASSERT_PAUTH_ENABLED();
if (!(hwcaps & HWCAP_PACG)) {
TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
nkeys = NKEYS - 1;
@@ -299,7 +301,7 @@ TEST(exec_changed_keys)
unsigned long hwcaps = getauxval(AT_HWCAP);
/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
- ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+ ASSERT_PAUTH_ENABLED();
if (!(hwcaps & HWCAP_PACG)) {
TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
nkeys = NKEYS - 1;
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index 78c902045ca7..c1742755abb9 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
mangle_*
fake_sigreturn_*
+sve_*
!*.[ch]
diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
index f96baf1cef1a..ebe8694dbef0 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.h
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -33,10 +33,12 @@
*/
enum {
FSSBS_BIT,
+ FSVE_BIT,
FMAX_END
};
#define FEAT_SSBS (1UL << FSSBS_BIT)
+#define FEAT_SVE (1UL << FSVE_BIT)
/*
* A descriptor used to describe and configure a test case.
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 2de6e5ed5e25..22722abc9dfa 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -26,6 +26,7 @@ static int sig_copyctx = SIGTRAP;
static char const *const feats_names[FMAX_END] = {
" SSBS ",
+ " SVE ",
};
#define MAX_FEATS_SZ 128
@@ -263,16 +264,21 @@ int test_init(struct tdescr *td)
*/
if (getauxval(AT_HWCAP) & HWCAP_SSBS)
td->feats_supported |= FEAT_SSBS;
- if (feats_ok(td))
+ if (getauxval(AT_HWCAP) & HWCAP_SVE)
+ td->feats_supported |= FEAT_SVE;
+ if (feats_ok(td)) {
fprintf(stderr,
"Required Features: [%s] supported\n",
feats_to_string(td->feats_required &
td->feats_supported));
- else
+ } else {
fprintf(stderr,
"Required Features: [%s] NOT supported\n",
feats_to_string(td->feats_required &
~td->feats_supported));
+ td->result = KSFT_SKIP;
+ return 0;
+ }
}
/* Perform test specific additional initialization */
diff --git a/tools/testing/selftests/arm64/signal/testcases/TODO b/tools/testing/selftests/arm64/signal/testcases/TODO
new file mode 100644
index 000000000000..110ff9fd195d
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/TODO
@@ -0,0 +1,2 @@
+- Validate that register contents are saved and restored as expected.
+- Support and validate extra_context.
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
new file mode 100644
index 000000000000..bb50b5adbf10
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Attempt to change the SVE vector length in a signal hander, this is not
+ * supported and is expected to segfault.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sve_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least two VLs */
+ if (nvls < 2) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static int fake_sigreturn_sve_change_vl(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *sve;
+
+ /* Get a signal context with a SVE frame in it */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ if (head->size != sizeof(struct sve_context)) {
+ fprintf(stderr, "SVE register state active, skipping\n");
+ return 1;
+ }
+
+ sve = (struct sve_context *)head;
+
+ /* No changes are supported; init left us at minimum VL so go to max */
+ fprintf(stderr, "Attempting to change VL from %d to %d\n",
+ sve->vl, vls[0]);
+ sve->vl = vls[0];
+
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_SVE_CHANGE",
+ .descr = "Attempt to change SVE VL",
+ .feats_required = FEAT_SVE,
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .init = sve_get_vls,
+ .run = fake_sigreturn_sve_change_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
new file mode 100644
index 000000000000..4b2418aa08a9
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the SVE register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sve_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least one VL */
+ if (nvls < 1) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static void setup_sve_regs(void)
+{
+ /* RDVL x16, #1 so we should have SVE regs; real data is TODO */
+ asm volatile(".inst 0x04bf5030" : : : "x16" );
+}
+
+static int do_one_sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *sve;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ if (prctl(PR_SVE_SET_VL, vl) == -1) {
+ fprintf(stderr, "Failed to set VL\n");
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have a SVE frame and registers
+ * in it.
+ */
+ setup_sve_regs();
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ sve = (struct sve_context *)head;
+ if (sve->vl != vl) {
+ fprintf(stderr, "Got VL %d, expected %d\n", sve->vl, vl);
+ return 1;
+ }
+
+ /* The actual size validation is done in get_current_context() */
+ fprintf(stderr, "Got expected size %u and VL %d\n",
+ head->size, sve->vl);
+
+ return 0;
+}
+
+static int sve_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ /*
+ * TODO: the signal test helpers can't currently cope
+ * with signal frames bigger than struct sigcontext,
+ * skip VLs that will trigger that.
+ */
+ if (vls[i] > 64)
+ continue;
+
+ if (do_one_sve_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "SVE registers",
+ .descr = "Check that we get the right SVE registers reported",
+ .feats_required = FEAT_SVE,
+ .timeout = 3,
+ .init = sve_get_vls,
+ .run = sve_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_vl.c b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
new file mode 100644
index 000000000000..92904653add1
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Check that the SVE vector length reported in signal contexts is the
+ * expected one.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+unsigned int vl;
+
+static bool get_sve_vl(struct tdescr *td)
+{
+ int ret = prctl(PR_SVE_GET_VL);
+ if (ret == -1)
+ return false;
+
+ vl = ret;
+
+ return true;
+}
+
+static int sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *sve;
+
+ /* Get a signal context which should have a SVE frame in it */
+ if (!get_current_context(td, &sf.uc))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+ sve = (struct sve_context *)head;
+
+ if (sve->vl != vl) {
+ fprintf(stderr, "sigframe VL %u, expected %u\n",
+ sve->vl, vl);
+ return 1;
+ } else {
+ fprintf(stderr, "got expected VL %u\n", vl);
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "SVE VL",
+ .descr = "Check that we get the right SVE VL reported",
+ .feats_required = FEAT_SVE,
+ .timeout = 3,
+ .init = get_sve_vl,
+ .run = sve_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
index 61ebcdf63831..8c2a57fc2f9c 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -50,12 +50,38 @@ bool validate_extra_context(struct extra_context *extra, char **err)
return true;
}
+bool validate_sve_context(struct sve_context *sve, char **err)
+{
+ /* Size will be rounded up to a multiple of 16 bytes */
+ size_t regs_size
+ = ((SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl)) + 15) / 16) * 16;
+
+ if (!sve || !err)
+ return false;
+
+ /* Either a bare sve_context or a sve_context followed by regs data */
+ if ((sve->head.size != sizeof(struct sve_context)) &&
+ (sve->head.size != regs_size)) {
+ *err = "bad size for SVE context";
+ return false;
+ }
+
+ if (!sve_vl_valid(sve->vl)) {
+ *err = "SVE VL invalid";
+
+ return false;
+ }
+
+ return true;
+}
+
bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
{
bool terminated = false;
size_t offs = 0;
int flags = 0;
struct extra_context *extra = NULL;
+ struct sve_context *sve = NULL;
struct _aarch64_ctx *head =
(struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
@@ -90,9 +116,8 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
case SVE_MAGIC:
if (flags & SVE_CTX)
*err = "Multiple SVE_MAGIC";
- else if (head->size !=
- sizeof(struct sve_context))
- *err = "Bad size for sve_context";
+ /* Size is validated in validate_sve_context() */
+ sve = (struct sve_context *)head;
flags |= SVE_CTX;
break;
case EXTRA_MAGIC:
@@ -137,6 +162,9 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
if (flags & EXTRA_CTX)
if (!validate_extra_context(extra, err))
return false;
+ if (flags & SVE_CTX)
+ if (!validate_sve_context(sve, err))
+ return false;
head = GET_RESV_NEXT_HEAD(head);
}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 4866f6a21901..433f8bef261e 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -10,6 +10,7 @@ FEATURE-DUMP.libbpf
fixdep
test_dev_cgroup
/test_progs*
+!test_progs.h
test_verifier_log
feature
test_sock
@@ -22,7 +23,6 @@ test_skb_cgroup_id_user
test_cgroup_storage
test_flow_dissector
flow_dissector_load
-test_netcnt
test_tcpnotify_user
test_libbpf
test_tcp_check_syncookie_user
@@ -30,10 +30,13 @@ test_sysctl
xdping
test_cpp
*.skel.h
+*.lskel.h
/no_alu32
/bpf_gcc
/tools
/runqslower
/bench
*.ko
+*.tmp
xdpxceiver
+xdp_redirect_multi
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 511259c2c6c5..799b88152e9e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -38,7 +38,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_verifier_log test_dev_cgroup \
test_sock test_sockmap get_cgroup_id_user \
test_cgroup_storage \
- test_netcnt test_tcpnotify_user test_sysctl \
+ test_tcpnotify_user test_sysctl \
test_progs-no_alu32
# Also test bpf-gcc, if present
@@ -54,6 +54,7 @@ TEST_FILES = xsk_prereqs.sh \
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
test_xdp_redirect.sh \
+ test_xdp_redirect_multi.sh \
test_xdp_meta.sh \
test_xdp_veth.sh \
test_offload.py \
@@ -78,13 +79,13 @@ TEST_PROGS := test_kmod.sh \
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
- test_xdp_vlan.sh
+ test_xdp_vlan.sh test_bpftool.py
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
- xdpxceiver
+ xdpxceiver xdp_redirect_multi
TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
@@ -186,6 +187,8 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \
cp $(SCRATCH_DIR)/runqslower $@
+TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL)
+
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
@@ -196,7 +199,6 @@ $(OUTPUT)/test_sockmap: cgroup_helpers.c
$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
-$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
@@ -312,6 +314,10 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
linked_vars.skel.h linked_maps.skel.h
+LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
+ test_ksyms_module.c test_ringbuf.c atomics.c trace_printk.c
+SKEL_BLACKLIST += $$(LSKELS)
+
test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
@@ -339,6 +345,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
$$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
$$(TRUNNER_BPF_SRCS)))
+TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS))
TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
@@ -368,7 +375,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
- $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
+ $(wildcard $(BPFDIR)/bpf_*.h) \
+ | $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS))
@@ -380,6 +388,14 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
$(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
+$(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
+ $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
+
$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o))
$(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps))
@@ -409,6 +425,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_OBJS) \
$(TRUNNER_BPF_SKELS) \
+ $(TRUNNER_BPF_LSKELS) \
$(TRUNNER_BPF_SKELS_LINKED) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
@@ -516,6 +533,6 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature \
- $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
+ $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h no_alu32 bpf_gcc bpf_testmod.ko)
.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
index ccf260021e83..eb6a4fea8c79 100644
--- a/tools/testing/selftests/bpf/Makefile.docs
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -52,7 +52,8 @@ $(OUTPUT)%.$2: $(OUTPUT)%.rst
ifndef RST2MAN_DEP
$$(error "rst2man not found, but required to generate man pages")
endif
- $$(QUIET_GEN)rst2man $$< > $$@
+ $$(QUIET_GEN)rst2man --exit-status=1 $$< > $$@.tmp
+ $$(QUIET_GEN)mv $$@.tmp $$@
docs-clean-$1:
$$(call QUIET_CLEAN, eBPF_$1-manpage)
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 3353778c30f8..9b17f2867488 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -19,6 +19,13 @@ the CI. It builds the kernel (without overwriting your existing Kconfig), recomp
bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and
saves the resulting output (by default in ``~/.bpf_selftests``).
+Script dependencies:
+- clang (preferably built from sources, https://github.com/llvm/llvm-project);
+- pahole (preferably built from sources, https://git.kernel.org/pub/scm/devel/pahole/pahole.git/);
+- qemu;
+- docutils (for ``rst2man``);
+- libcap-devel.
+
For more information on about using the script, run:
.. code-block:: console
@@ -202,3 +209,22 @@ generate valid BTF information for weak variables. Please make sure you use
Clang that contains the fix.
__ https://reviews.llvm.org/D100362
+
+Clang relocation changes
+========================
+
+Clang 13 patch `clang reloc patch`_ made some changes on relocations such
+that existing relocation types are broken into more types and
+each new type corresponds to only one way to resolve relocation.
+See `kernel llvm reloc`_ for more explanation and some examples.
+Using clang 13 to compile old libbpf which has static linker support,
+there will be a compilation failure::
+
+ libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.o
+
+Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``.
+To fix this issue, user newer libbpf.
+
+.. Links
+.. _clang reloc patch: https://reviews.llvm.org/D102712
+.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 332ed2f7b402..6ea15b93a2f8 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -43,6 +43,7 @@ void setup_libbpf()
{
int err;
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
libbpf_set_print(libbpf_print_fn);
err = bump_memlock_rlimit();
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
index a967674098ad..c7ec114eca56 100644
--- a/tools/testing/selftests/bpf/benchs/bench_rename.c
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -65,7 +65,7 @@ static void attach_bpf(struct bpf_program *prog)
struct bpf_link *link;
link = bpf_program__attach(prog);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index bde6c9d4cbd4..d167bffac679 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -181,7 +181,7 @@ static void ringbuf_libbpf_setup()
}
link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
@@ -271,7 +271,7 @@ static void ringbuf_custom_setup()
}
link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program\n");
exit(1);
}
@@ -430,7 +430,7 @@ static void perfbuf_libbpf_setup()
}
link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program\n");
exit(1);
}
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 2a0b6c9885a4..f41a491a8cc0 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -60,7 +60,7 @@ static void attach_bpf(struct bpf_program *prog)
struct bpf_link *link;
link = bpf_program__attach(prog);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 029589c008c9..b1ede6f0b821 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -12,6 +12,10 @@
SEC("struct_ops/"#name) \
BPF_PROG(name, args)
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
#define tcp_jiffies32 ((__u32)bpf_jiffies64())
struct sock_common {
@@ -27,6 +31,7 @@ enum sk_pacing {
struct sock {
struct sock_common __sk_common;
+#define sk_state __sk_common.skc_state
unsigned long sk_pacing_rate;
__u32 sk_pacing_status; /* see enum sk_pacing */
} __attribute__((preserve_access_index));
@@ -203,6 +208,20 @@ static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}
+static __always_inline bool tcp_cc_eq(const char *a, const char *b)
+{
+ int i;
+
+ for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+ if (a[i] != b[i])
+ return false;
+ if (!a[i])
+ break;
+ }
+
+ return true;
+}
+
extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 033051717ba5..f3daa44a8266 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -12,27 +12,36 @@
#include <unistd.h>
#include <ftw.h>
-
#include "cgroup_helpers.h"
/*
* To avoid relying on the system setup, when setup_cgroup_env is called
- * we create a new mount namespace, and cgroup namespace. The cgroup2
- * root is mounted at CGROUP_MOUNT_PATH
- *
- * Unfortunately, most people don't have cgroupv2 enabled at this point in time.
- * It's easier to create our own mount namespace and manage it ourselves.
+ * we create a new mount namespace, and cgroup namespace. The cgroupv2
+ * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
+ * have cgroupv2 enabled at this point in time. It's easier to create our
+ * own mount namespace and manage it ourselves. We assume /mnt exists.
*
- * We assume /mnt exists.
+ * Related cgroupv1 helpers are named *classid*(), since we only use the
+ * net_cls controller for tagging net_cls.classid. We assume the default
+ * mount under /sys/fs/cgroup/net_cls, which should be the case for the
+ * vast majority of users.
*/
#define WALK_FD_LIMIT 16
+
#define CGROUP_MOUNT_PATH "/mnt"
+#define CGROUP_MOUNT_DFLT "/sys/fs/cgroup"
+#define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls"
#define CGROUP_WORK_DIR "/cgroup-test-work-dir"
+
#define format_cgroup_path(buf, path) \
snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
CGROUP_WORK_DIR, path)
+#define format_classid_path(buf) \
+ snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \
+ CGROUP_WORK_DIR)
+
/**
* enable_all_controllers() - Enable all available cgroup v2 controllers
*
@@ -139,8 +148,7 @@ static int nftwfunc(const char *filename, const struct stat *statptr,
return 0;
}
-
-static int join_cgroup_from_top(char *cgroup_path)
+static int join_cgroup_from_top(const char *cgroup_path)
{
char cgroup_procs_path[PATH_MAX + 1];
pid_t pid = getpid();
@@ -313,3 +321,114 @@ int cgroup_setup_and_join(const char *path) {
}
return cg_fd;
}
+
+/**
+ * setup_classid_environment() - Setup the cgroupv1 net_cls environment
+ *
+ * After calling this function, cleanup_classid_environment should be called
+ * once testing is complete.
+ *
+ * This function will print an error to stderr and return 1 if it is unable
+ * to setup the cgroup environment. If setup is successful, 0 is returned.
+ */
+int setup_classid_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+
+ if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
+ errno != EBUSY) {
+ log_err("mount cgroup base");
+ return 1;
+ }
+
+ if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
+ log_err("mkdir cgroup net_cls");
+ return 1;
+ }
+
+ if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
+ errno != EBUSY) {
+ log_err("mount cgroup net_cls");
+ return 1;
+ }
+
+ cleanup_classid_environment();
+
+ if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
+ log_err("mkdir cgroup work dir");
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * set_classid() - Set a cgroupv1 net_cls classid
+ * @id: the numeric classid
+ *
+ * Writes the passed classid into the cgroup work dir's net_cls.classid
+ * file in order to later on trigger socket tagging.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1. If there
+ * is a failure, it prints the error to stderr.
+ */
+int set_classid(unsigned int id)
+{
+ char cgroup_workdir[PATH_MAX - 42];
+ char cgroup_classid_path[PATH_MAX + 1];
+ int fd, rc = 0;
+
+ format_classid_path(cgroup_workdir);
+ snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
+ "%s/net_cls.classid", cgroup_workdir);
+
+ fd = open(cgroup_classid_path, O_WRONLY);
+ if (fd < 0) {
+ log_err("Opening cgroup classid: %s", cgroup_classid_path);
+ return 1;
+ }
+
+ if (dprintf(fd, "%u\n", id) < 0) {
+ log_err("Setting cgroup classid");
+ rc = 1;
+ }
+
+ close(fd);
+ return rc;
+}
+
+/**
+ * join_classid() - Join a cgroupv1 net_cls classid
+ *
+ * This function expects the cgroup work dir to be already created, as we
+ * join it here. This causes the process sockets to be tagged with the given
+ * net_cls classid.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_classid(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ return join_cgroup_from_top(cgroup_workdir);
+}
+
+/**
+ * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
+ *
+ * At call time, it moves the calling process to the root cgroup, and then
+ * runs the deletion process.
+ *
+ * On failure, it will print an error to stderr, and try to continue.
+ */
+void cleanup_classid_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ join_cgroup_from_top(NETCLS_MOUNT_PATH);
+ nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 5fe3d88e4f0d..629da3854b3e 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __CGROUP_HELPERS_H
#define __CGROUP_HELPERS_H
+
#include <errno.h>
#include <string.h>
@@ -8,12 +9,21 @@
#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
-
+/* cgroupv2 related */
int cgroup_setup_and_join(const char *path);
int create_and_get_cgroup(const char *path);
+unsigned long long get_cgroup_id(const char *path);
+
int join_cgroup(const char *path);
+
int setup_cgroup_environment(void);
void cleanup_cgroup_environment(void);
-unsigned long long get_cgroup_id(const char *path);
-#endif
+/* cgroupv1 related */
+int set_classid(unsigned int id);
+int join_classid(void);
+
+int setup_classid_environment(void);
+void cleanup_classid_environment(void);
+
+#endif /* __CGROUP_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h
index 81084c1c2c23..0ab1c88041cd 100644
--- a/tools/testing/selftests/bpf/netcnt_common.h
+++ b/tools/testing/selftests/bpf/netcnt_common.h
@@ -6,19 +6,39 @@
#define MAX_PERCPU_PACKETS 32
-struct percpu_net_cnt {
- __u64 packets;
- __u64 bytes;
+/* sizeof(struct bpf_local_storage_elem):
+ *
+ * It really is about 128 bytes on x86_64, but allocate more to account for
+ * possible layout changes, different architectures, etc.
+ * The kernel will wrap up to PAGE_SIZE internally anyway.
+ */
+#define SIZEOF_BPF_LOCAL_STORAGE_ELEM 256
- __u64 prev_ts;
+/* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */
+#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE (0xFFFF - \
+ SIZEOF_BPF_LOCAL_STORAGE_ELEM)
- __u64 prev_packets;
- __u64 prev_bytes;
+#define PCPU_MIN_UNIT_SIZE 32768
+
+union percpu_net_cnt {
+ struct {
+ __u64 packets;
+ __u64 bytes;
+
+ __u64 prev_ts;
+
+ __u64 prev_packets;
+ __u64 prev_bytes;
+ };
+ __u8 data[PCPU_MIN_UNIT_SIZE];
};
-struct net_cnt {
- __u64 packets;
- __u64 bytes;
+union net_cnt {
+ struct {
+ __u64 packets;
+ __u64 bytes;
+ };
+ __u8 data[BPF_LOCAL_STORAGE_MAX_VALUE_SIZE];
};
#endif
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 2060bc122c53..6db1af8fdee7 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -66,17 +66,13 @@ int settimeo(int fd, int timeout_ms)
#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
-int start_server(int family, int type, const char *addr_str, __u16 port,
- int timeout_ms)
+static int __start_server(int type, const struct sockaddr *addr,
+ socklen_t addrlen, int timeout_ms, bool reuseport)
{
- struct sockaddr_storage addr = {};
- socklen_t len;
+ int on = 1;
int fd;
- if (make_sockaddr(family, addr_str, port, &addr, &len))
- return -1;
-
- fd = socket(family, type, 0);
+ fd = socket(addr->sa_family, type, 0);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
@@ -85,7 +81,13 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
if (settimeo(fd, timeout_ms))
goto error_close;
- if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+ if (reuseport &&
+ setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
+ log_err("Failed to set SO_REUSEPORT");
+ return -1;
+ }
+
+ if (bind(fd, addr, addrlen) < 0) {
log_err("Failed to bind socket");
goto error_close;
}
@@ -104,6 +106,69 @@ error_close:
return -1;
}
+int start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return -1;
+
+ return __start_server(type, (struct sockaddr *)&addr,
+ addrlen, timeout_ms, false);
+}
+
+int *start_reuseport_server(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms, unsigned int nr_listens)
+{
+ struct sockaddr_storage addr;
+ unsigned int nr_fds = 0;
+ socklen_t addrlen;
+ int *fds;
+
+ if (!nr_listens)
+ return NULL;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return NULL;
+
+ fds = malloc(sizeof(*fds) * nr_listens);
+ if (!fds)
+ return NULL;
+
+ fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen,
+ timeout_ms, true);
+ if (fds[0] == -1)
+ goto close_fds;
+ nr_fds = 1;
+
+ if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
+ goto close_fds;
+
+ for (; nr_fds < nr_listens; nr_fds++) {
+ fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr,
+ addrlen, timeout_ms, true);
+ if (fds[nr_fds] == -1)
+ goto close_fds;
+ }
+
+ return fds;
+
+close_fds:
+ free_fds(fds, nr_fds);
+ return NULL;
+}
+
+void free_fds(int *fds, unsigned int nr_close_fds)
+{
+ if (fds) {
+ while (nr_close_fds)
+ close(fds[--nr_close_fds]);
+ free(fds);
+ }
+}
+
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms)
{
@@ -143,23 +208,43 @@ error_close:
static int connect_fd_to_addr(int fd,
const struct sockaddr_storage *addr,
- socklen_t addrlen)
+ socklen_t addrlen, const bool must_fail)
{
- if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
- log_err("Failed to connect to server");
- return -1;
+ int ret;
+
+ errno = 0;
+ ret = connect(fd, (const struct sockaddr *)addr, addrlen);
+ if (must_fail) {
+ if (!ret) {
+ log_err("Unexpected success to connect to server");
+ return -1;
+ }
+ if (errno != EPERM) {
+ log_err("Unexpected error from connect to server");
+ return -1;
+ }
+ } else {
+ if (ret) {
+ log_err("Failed to connect to server");
+ return -1;
+ }
}
return 0;
}
-int connect_to_fd(int server_fd, int timeout_ms)
+static const struct network_helper_opts default_opts;
+
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
{
struct sockaddr_storage addr;
struct sockaddr_in *addr_in;
socklen_t addrlen, optlen;
int fd, type;
+ if (!opts)
+ opts = &default_opts;
+
optlen = sizeof(type);
if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
log_err("getsockopt(SOL_TYPE)");
@@ -179,10 +264,15 @@ int connect_to_fd(int server_fd, int timeout_ms)
return -1;
}
- if (settimeo(fd, timeout_ms))
+ if (settimeo(fd, opts->timeout_ms))
+ goto error_close;
+
+ if (opts->cc && opts->cc[0] &&
+ setsockopt(fd, SOL_TCP, TCP_CONGESTION, opts->cc,
+ strlen(opts->cc) + 1))
goto error_close;
- if (connect_fd_to_addr(fd, &addr, addrlen))
+ if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail))
goto error_close;
return fd;
@@ -192,6 +282,15 @@ error_close:
return -1;
}
+int connect_to_fd(int server_fd, int timeout_ms)
+{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ };
+
+ return connect_to_fd_opts(server_fd, &opts);
+}
+
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
{
struct sockaddr_storage addr;
@@ -205,7 +304,7 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
return -1;
}
- if (connect_fd_to_addr(client_fd, &addr, len))
+ if (connect_fd_to_addr(client_fd, &addr, len, false))
return -1;
return 0;
@@ -217,6 +316,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
if (family == AF_INET) {
struct sockaddr_in *sin = (void *)addr;
+ memset(addr, 0, sizeof(*sin));
sin->sin_family = AF_INET;
sin->sin_port = htons(port);
if (addr_str &&
@@ -230,6 +330,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
} else if (family == AF_INET6) {
struct sockaddr_in6 *sin6 = (void *)addr;
+ memset(addr, 0, sizeof(*sin6));
sin6->sin6_family = AF_INET6;
sin6->sin6_port = htons(port);
if (addr_str &&
@@ -243,3 +344,15 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
}
return -1;
}
+
+char *ping_command(int family)
+{
+ if (family == AF_INET6) {
+ /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
+ if (!system("which ping6 >/dev/null 2>&1"))
+ return "ping6";
+ else
+ return "ping -6";
+ }
+ return "ping";
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 5e0d51c07b63..d198181a5648 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -17,6 +17,12 @@ typedef __u16 __sum16;
#define VIP_NUM 5
#define MAGIC_BYTES 123
+struct network_helper_opts {
+ const char *cc;
+ int timeout_ms;
+ bool must_fail;
+};
+
/* ipv4 test vector */
struct ipv4_packet {
struct ethhdr eth;
@@ -36,11 +42,17 @@ extern struct ipv6_packet pkt_v6;
int settimeo(int fd, int timeout_ms);
int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
+int *start_reuseport_server(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms,
+ unsigned int nr_listens);
+void free_fds(int *fds, unsigned int nr_close_fds);
int connect_to_fd(int server_fd, int timeout_ms);
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms);
int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len);
+char *ping_command(int family);
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 21efe7bbf10d..ba0e1efe5a45 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -2,19 +2,19 @@
#include <test_progs.h>
-#include "atomics.skel.h"
+#include "atomics.lskel.h"
static void test_add(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.add);
- if (CHECK(IS_ERR(link), "attach(add)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__add__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(add)"))
return;
- prog_fd = bpf_program__fd(skel->progs.add);
+ prog_fd = skel->progs.add.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run add",
@@ -33,20 +33,20 @@ static void test_add(struct atomics *skel)
ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_sub(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.sub);
- if (CHECK(IS_ERR(link), "attach(sub)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__sub__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(sub)"))
return;
- prog_fd = bpf_program__fd(skel->progs.sub);
+ prog_fd = skel->progs.sub.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run sub",
@@ -66,20 +66,20 @@ static void test_sub(struct atomics *skel)
ASSERT_EQ(skel->data->sub_noreturn_value, -1, "sub_noreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_and(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.and);
- if (CHECK(IS_ERR(link), "attach(and)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__and__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(and)"))
return;
- prog_fd = bpf_program__fd(skel->progs.and);
+ prog_fd = skel->progs.and.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run and",
@@ -94,20 +94,20 @@ static void test_and(struct atomics *skel)
ASSERT_EQ(skel->data->and_noreturn_value, 0x010ull << 32, "and_noreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_or(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.or);
- if (CHECK(IS_ERR(link), "attach(or)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__or__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(or)"))
return;
- prog_fd = bpf_program__fd(skel->progs.or);
+ prog_fd = skel->progs.or.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run or",
@@ -123,20 +123,20 @@ static void test_or(struct atomics *skel)
ASSERT_EQ(skel->data->or_noreturn_value, 0x111ull << 32, "or_noreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_xor(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.xor);
- if (CHECK(IS_ERR(link), "attach(xor)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__xor__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(xor)"))
return;
- prog_fd = bpf_program__fd(skel->progs.xor);
+ prog_fd = skel->progs.xor.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run xor",
@@ -151,20 +151,20 @@ static void test_xor(struct atomics *skel)
ASSERT_EQ(skel->data->xor_noreturn_value, 0x101ull << 32, "xor_nxoreturn_value");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_cmpxchg(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.cmpxchg);
- if (CHECK(IS_ERR(link), "attach(cmpxchg)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__cmpxchg__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)"))
return;
- prog_fd = bpf_program__fd(skel->progs.cmpxchg);
+ prog_fd = skel->progs.cmpxchg.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run add",
@@ -180,20 +180,20 @@ static void test_cmpxchg(struct atomics *skel)
ASSERT_EQ(skel->bss->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
static void test_xchg(struct atomics *skel)
{
int err, prog_fd;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
- link = bpf_program__attach(skel->progs.xchg);
- if (CHECK(IS_ERR(link), "attach(xchg)", "err: %ld\n", PTR_ERR(link)))
+ link_fd = atomics__xchg__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(xchg)"))
return;
- prog_fd = bpf_program__fd(skel->progs.xchg);
+ prog_fd = skel->progs.xchg.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
if (CHECK(err || retval, "test_run add",
@@ -207,7 +207,7 @@ static void test_xchg(struct atomics *skel)
ASSERT_EQ(skel->bss->xchg32_result, 1, "xchg32_result");
cleanup:
- bpf_link__destroy(link);
+ close(link_fd);
}
void test_atomics(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 9dc4e3dfbcf3..bf307bb9e446 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -2,79 +2,28 @@
#include <test_progs.h>
#include "test_attach_probe.skel.h"
-#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
-
-#define OP_RT_RA_MASK 0xffff0000UL
-#define LIS_R2 0x3c400000UL
-#define ADDIS_R2_R12 0x3c4c0000UL
-#define ADDI_R2_R2 0x38420000UL
-
-static ssize_t get_offset(ssize_t addr, ssize_t base)
-{
- u32 *insn = (u32 *) addr;
-
- /*
- * A PPC64 ABIv2 function may have a local and a global entry
- * point. We need to use the local entry point when patching
- * functions, so identify and step over the global entry point
- * sequence.
- *
- * The global entry point sequence is always of the form:
- *
- * addis r2,r12,XXXX
- * addi r2,r2,XXXX
- *
- * A linker optimisation may convert the addis to lis:
- *
- * lis r2,XXXX
- * addi r2,r2,XXXX
- */
- if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
- ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
- ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
- return (ssize_t)(insn + 2) - base;
- else
- return addr - base;
-}
-#else
-#define get_offset(addr, base) (addr - base)
-#endif
-
-ssize_t get_base_addr() {
- size_t start, offset;
- char buf[256];
- FILE *f;
-
- f = fopen("/proc/self/maps", "r");
- if (!f)
- return -errno;
-
- while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
- &start, buf, &offset) == 3) {
- if (strcmp(buf, "r-xp") == 0) {
- fclose(f);
- return start - offset;
- }
- }
-
- fclose(f);
- return -EINVAL;
-}
+/* this is how USDT semaphore is actually defined, except volatile modifier */
+volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
void test_attach_probe(void)
{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
int duration = 0;
struct bpf_link *kprobe_link, *kretprobe_link;
struct bpf_link *uprobe_link, *uretprobe_link;
struct test_attach_probe* skel;
size_t uprobe_offset;
- ssize_t base_addr;
+ ssize_t base_addr, ref_ctr_offset;
base_addr = get_base_addr();
if (CHECK(base_addr < 0, "get_base_addr",
"failed to find base addr: %zd", base_addr))
return;
- uprobe_offset = get_offset((size_t)&get_base_addr, base_addr);
+ uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+ ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
+ if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
+ return;
skel = test_attach_probe__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
@@ -85,36 +34,40 @@ void test_attach_probe(void)
kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
false /* retprobe */,
SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
- "err %ld\n", PTR_ERR(kprobe_link)))
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
goto cleanup;
skel->links.handle_kprobe = kprobe_link;
kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
true /* retprobe */,
SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
- "err %ld\n", PTR_ERR(kretprobe_link)))
+ if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe"))
goto cleanup;
skel->links.handle_kretprobe = kretprobe_link;
- uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
- false /* retprobe */,
- 0 /* self pid */,
- "/proc/self/exe",
- uprobe_offset);
- if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
- "err %ld\n", PTR_ERR(uprobe_link)))
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
+
+ uprobe_opts.retprobe = false;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
goto cleanup;
skel->links.handle_uprobe = uprobe_link;
- uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe,
- true /* retprobe */,
- -1 /* any pid */,
- "/proc/self/exe",
- uprobe_offset);
- if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
- "err %ld\n", PTR_ERR(uretprobe_link)))
+ ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
+
+ /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */
+ uprobe_opts.retprobe = true;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+ -1 /* any pid */,
+ "/proc/self/exe",
+ uprobe_offset, &uprobe_opts);
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe"))
goto cleanup;
skel->links.handle_uretprobe = uretprobe_link;
@@ -140,4 +93,5 @@ void test_attach_probe(void)
cleanup:
test_attach_probe__destroy(skel);
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
new file mode 100644
index 000000000000..5eea3c3a40fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "test_bpf_cookie.skel.h"
+
+static void kprobe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+
+ /* attach two kprobes */
+ opts.bpf_cookie = 0x1;
+ opts.retprobe = false;
+ link1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x2;
+ opts.retprobe = false;
+ link2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* attach two kretprobes */
+ opts.bpf_cookie = 0x10;
+ opts.retprobe = true;
+ retlink1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x20;
+ opts.retprobe = true;
+ retlink2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+ goto cleanup;
+
+ /* trigger kprobe && kretprobe */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->kprobe_res, 0x1 | 0x2, "kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 0x10 | 0x20, "kretprobe_res");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(retlink1);
+ bpf_link__destroy(retlink2);
+}
+
+static void uprobe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+ size_t uprobe_offset;
+ ssize_t base_addr;
+
+ base_addr = get_base_addr();
+ uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+ /* attach two uprobes */
+ opts.bpf_cookie = 0x100;
+ opts.retprobe = false;
+ link1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, 0 /* self pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x200;
+ opts.retprobe = false;
+ link2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, -1 /* any pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* attach two uretprobes */
+ opts.bpf_cookie = 0x1000;
+ opts.retprobe = true;
+ retlink1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, -1 /* any pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x2000;
+ opts.retprobe = true;
+ retlink2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, 0 /* self pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+ goto cleanup;
+
+ /* trigger uprobe && uretprobe */
+ get_base_addr();
+
+ ASSERT_EQ(skel->bss->uprobe_res, 0x100 | 0x200, "uprobe_res");
+ ASSERT_EQ(skel->bss->uretprobe_res, 0x1000 | 0x2000, "uretprobe_res");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(retlink1);
+ bpf_link__destroy(retlink2);
+}
+
+static void tp_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL, *link3 = NULL;
+
+ /* attach first tp prog */
+ opts.bpf_cookie = 0x10000;
+ link1 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp1,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ /* attach second tp prog */
+ opts.bpf_cookie = 0x20000;
+ link2 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp2,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->tp_res, 0x10000 | 0x20000, "tp_res1");
+
+ /* now we detach first prog and will attach third one, which causes
+ * two internal calls to bpf_prog_array_copy(), shuffling
+ * bpf_prog_array_items around. We test here that we don't lose track
+ * of associated bpf_cookies.
+ */
+ bpf_link__destroy(link1);
+ link1 = NULL;
+ kern_sync_rcu();
+ skel->bss->tp_res = 0;
+
+ /* attach third tp prog */
+ opts.bpf_cookie = 0x40000;
+ link3 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp3,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link3, "link3"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->tp_res, 0x20000 | 0x40000, "tp_res2");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link3);
+}
+
+static void burn_cpu(void)
+{
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+ int i, err;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ ASSERT_OK(err, "set_thread_affinity");
+
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+}
+
+static void pe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts);
+ struct bpf_link *link = NULL;
+ struct perf_event_attr attr;
+ int pfd = -1;
+
+ /* create perf event */
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(pfd, 0, "perf_fd"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x100000;
+ link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "link1"))
+ goto cleanup;
+
+ burn_cpu(); /* trigger BPF prog */
+
+ ASSERT_EQ(skel->bss->pe_res, 0x100000, "pe_res1");
+
+ /* prevent bpf_link__destroy() closing pfd itself */
+ bpf_link__disconnect(link);
+ /* close BPF link's FD explicitly */
+ close(bpf_link__fd(link));
+ /* free up memory used by struct bpf_link */
+ bpf_link__destroy(link);
+ link = NULL;
+ kern_sync_rcu();
+ skel->bss->pe_res = 0;
+
+ opts.bpf_cookie = 0x200000;
+ link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "link2"))
+ goto cleanup;
+
+ burn_cpu(); /* trigger BPF prog */
+
+ ASSERT_EQ(skel->bss->pe_res, 0x200000, "pe_res2");
+
+cleanup:
+ close(pfd);
+ bpf_link__destroy(link);
+}
+
+void test_bpf_cookie(void)
+{
+ struct test_bpf_cookie *skel;
+
+ skel = test_bpf_cookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_tid = syscall(SYS_gettid);
+
+ if (test__start_subtest("kprobe"))
+ kprobe_subtest(skel);
+ if (test__start_subtest("uprobe"))
+ uprobe_subtest(skel);
+ if (test__start_subtest("tracepoint"))
+ tp_subtest(skel);
+ if (test__start_subtest("perf_event"))
+ pe_subtest(skel);
+
+ test_bpf_cookie__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 2d3590cfb5e1..77ac24b191d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -13,6 +13,7 @@
#include "bpf_iter_tcp6.skel.h"
#include "bpf_iter_udp4.skel.h"
#include "bpf_iter_udp6.skel.h"
+#include "bpf_iter_unix.skel.h"
#include "bpf_iter_test_kern1.skel.h"
#include "bpf_iter_test_kern2.skel.h"
#include "bpf_iter_test_kern3.skel.h"
@@ -47,7 +48,7 @@ static void do_dummy_read(struct bpf_program *prog)
int iter_fd, len;
link = bpf_program__attach_iter(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
return;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -201,7 +202,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
int ret = 0;
link = bpf_program__attach_iter(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
return ret;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -313,6 +314,19 @@ static void test_udp6(void)
bpf_iter_udp6__destroy(skel);
}
+static void test_unix(void)
+{
+ struct bpf_iter_unix *skel;
+
+ skel = bpf_iter_unix__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_unix__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_unix);
+
+ bpf_iter_unix__destroy(skel);
+}
+
/* The expected string is less than 16 bytes */
static int do_read_with_fd(int iter_fd, const char *expected,
bool read_one_char)
@@ -396,7 +410,7 @@ static void test_file_iter(void)
return;
link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
/* unlink this path if it exists. */
@@ -502,7 +516,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
skel->bss->map2_id = map_info.id;
link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto free_map2;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -607,14 +621,12 @@ static void test_bpf_hash_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(!IS_ERR(link), "attach_iter",
- "attach_iter for hashmap2 unexpected succeeded\n"))
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
goto out;
linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(!IS_ERR(link), "attach_iter",
- "attach_iter for hashmap3 unexpected succeeded\n"))
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
goto out;
/* hashmap1 should be good, update map values here */
@@ -636,7 +648,7 @@ static void test_bpf_hash_map(void)
linfo.map.map_fd = map_fd;
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -727,7 +739,7 @@ static void test_bpf_percpu_hash_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -798,7 +810,7 @@ static void test_bpf_array_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -894,7 +906,7 @@ static void test_bpf_percpu_array_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -957,7 +969,7 @@ static void test_bpf_sk_storage_delete(void)
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map,
&opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -1075,7 +1087,7 @@ static void test_bpf_sk_storage_map(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -1128,7 +1140,7 @@ static void test_rdonly_buf_out_of_bound(void)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
- if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
bpf_link__destroy(link);
bpf_iter_test_kern5__destroy(skel);
@@ -1186,8 +1198,7 @@ static void test_task_vma(void)
skel->links.proc_maps = bpf_program__attach_iter(
skel->progs.proc_maps, NULL);
- if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter",
- "attach iterator failed\n")) {
+ if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
skel->links.proc_maps = NULL;
goto out;
}
@@ -1258,6 +1269,8 @@ void test_bpf_iter(void)
test_udp4();
if (test__start_subtest("udp6"))
test_udp6();
+ if (test__start_subtest("unix"))
+ test_unix();
if (test__start_subtest("anon"))
test_anon_iter(false);
if (test__start_subtest("anon-read-one-char"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
new file mode 100644
index 000000000000..85babb0487b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "bpf_dctcp.skel.h"
+#include "bpf_cubic.skel.h"
+#include "bpf_iter_setsockopt.skel.h"
+
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+ return -1;
+
+ return 0;
+}
+
+static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds)
+{
+ unsigned int i;
+
+ for (i = 0; i < nr_fds; i++) {
+ if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic",
+ sizeof("bpf_cubic")))
+ return i;
+ }
+
+ return nr_fds;
+}
+
+static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds)
+{
+ char tcp_cc[16];
+ socklen_t optlen = sizeof(tcp_cc);
+ unsigned int i;
+
+ for (i = 0; i < nr_fds; i++) {
+ if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION,
+ tcp_cc, &optlen) ||
+ strcmp(tcp_cc, "bpf_dctcp"))
+ return i;
+ }
+
+ return nr_fds;
+}
+
+static int *make_established(int listen_fd, unsigned int nr_est,
+ int **paccepted_fds)
+{
+ int *est_fds, *accepted_fds;
+ unsigned int i;
+
+ est_fds = malloc(sizeof(*est_fds) * nr_est);
+ if (!est_fds)
+ return NULL;
+
+ accepted_fds = malloc(sizeof(*accepted_fds) * nr_est);
+ if (!accepted_fds) {
+ free(est_fds);
+ return NULL;
+ }
+
+ for (i = 0; i < nr_est; i++) {
+ est_fds[i] = connect_to_fd(listen_fd, 0);
+ if (est_fds[i] == -1)
+ break;
+ if (set_bpf_cubic(&est_fds[i], 1) != 1) {
+ close(est_fds[i]);
+ break;
+ }
+
+ accepted_fds[i] = accept(listen_fd, NULL, 0);
+ if (accepted_fds[i] == -1) {
+ close(est_fds[i]);
+ break;
+ }
+ }
+
+ if (!ASSERT_EQ(i, nr_est, "create established fds")) {
+ free_fds(accepted_fds, i);
+ free_fds(est_fds, i);
+ return NULL;
+ }
+
+ *paccepted_fds = accepted_fds;
+ return est_fds;
+}
+
+static unsigned short get_local_port(int fd)
+{
+ struct sockaddr_in6 addr;
+ socklen_t addrlen = sizeof(addr);
+
+ if (!getsockname(fd, &addr, &addrlen))
+ return ntohs(addr.sin6_port);
+
+ return 0;
+}
+
+static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel,
+ bool random_retry)
+{
+ int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL;
+ unsigned int nr_reuse_listens = 256, nr_est = 256;
+ int err, iter_fd = -1, listen_fd = -1;
+ char buf;
+
+ /* Prepare non-reuseport listen_fd */
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(listen_fd, 0, "start_server"))
+ return;
+ if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1,
+ "set listen_fd to cubic"))
+ goto done;
+ iter_skel->bss->listen_hport = get_local_port(listen_fd);
+ if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0,
+ "get_local_port(listen_fd)"))
+ goto done;
+
+ /* Connect to non-reuseport listen_fd */
+ est_fds = make_established(listen_fd, nr_est, &accepted_fds);
+ if (!ASSERT_OK_PTR(est_fds, "create established"))
+ goto done;
+
+ /* Prepare reuseport listen fds */
+ reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM,
+ "::1", 0, 0,
+ nr_reuse_listens);
+ if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server"))
+ goto done;
+ if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens),
+ nr_reuse_listens, "set reuse_listen_fds to cubic"))
+ goto done;
+ iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]);
+ if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0,
+ "get_local_port(reuse_listen_fds[0])"))
+ goto done;
+
+ /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */
+ iter_skel->bss->random_retry = random_retry;
+ iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc));
+ if (!ASSERT_GE(iter_fd, 0, "create iter_fd"))
+ goto done;
+
+ while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+ errno == EAGAIN)
+ ;
+ if (!ASSERT_OK(err, "read iter error"))
+ goto done;
+
+ /* Check reuseport listen fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens),
+ nr_reuse_listens,
+ "check reuse_listen_fds dctcp");
+
+ /* Check non reuseport listen fd for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1,
+ "check listen_fd dctcp");
+
+ /* Check established fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est,
+ "check est_fds dctcp");
+
+ /* Check accepted fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est,
+ "check accepted_fds dctcp");
+
+done:
+ if (iter_fd != -1)
+ close(iter_fd);
+ if (listen_fd != -1)
+ close(listen_fd);
+ free_fds(reuse_listen_fds, nr_reuse_listens);
+ free_fds(accepted_fds, nr_est);
+ free_fds(est_fds, nr_est);
+}
+
+void test_bpf_iter_setsockopt(void)
+{
+ struct bpf_iter_setsockopt *iter_skel = NULL;
+ struct bpf_cubic *cubic_skel = NULL;
+ struct bpf_dctcp *dctcp_skel = NULL;
+ struct bpf_link *cubic_link = NULL;
+ struct bpf_link *dctcp_link = NULL;
+
+ if (create_netns())
+ return;
+
+ /* Load iter_skel */
+ iter_skel = bpf_iter_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(iter_skel, "iter_skel"))
+ return;
+ iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL);
+ if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter"))
+ goto done;
+
+ /* Load bpf_cubic */
+ cubic_skel = bpf_cubic__open_and_load();
+ if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel"))
+ goto done;
+ cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
+ if (!ASSERT_OK_PTR(cubic_link, "cubic_link"))
+ goto done;
+
+ /* Load bpf_dctcp */
+ dctcp_skel = bpf_dctcp__open_and_load();
+ if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+ goto done;
+ dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link"))
+ goto done;
+
+ do_bpf_iter_setsockopt(iter_skel, true);
+ do_bpf_iter_setsockopt(iter_skel, false);
+
+done:
+ bpf_link__destroy(cubic_link);
+ bpf_link__destroy(dctcp_link);
+ bpf_cubic__destroy(cubic_skel);
+ bpf_dctcp__destroy(dctcp_skel);
+ bpf_iter_setsockopt__destroy(iter_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index e25917f04602..94e03df69d71 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -4,37 +4,22 @@
#include <linux/err.h>
#include <netinet/tcp.h>
#include <test_progs.h>
+#include "network_helpers.h"
#include "bpf_dctcp.skel.h"
#include "bpf_cubic.skel.h"
#include "bpf_tcp_nogpl.skel.h"
+#include "bpf_dctcp_release.skel.h"
#define min(a, b) ((a) < (b) ? (a) : (b))
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
static const unsigned int total_bytes = 10 * 1024 * 1024;
-static const struct timeval timeo_sec = { .tv_sec = 10 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
static int expected_stg = 0xeB9F;
static int stop, duration;
-static int settimeo(int fd)
-{
- int err;
-
- err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
- timeo_optlen);
- if (CHECK(err == -1, "setsockopt(fd, SO_RCVTIMEO)", "errno:%d\n",
- errno))
- return -1;
-
- err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
- timeo_optlen);
- if (CHECK(err == -1, "setsockopt(fd, SO_SNDTIMEO)", "errno:%d\n",
- errno))
- return -1;
-
- return 0;
-}
-
static int settcpca(int fd, const char *tcp_ca)
{
int err;
@@ -61,7 +46,7 @@ static void *server(void *arg)
goto done;
}
- if (settimeo(fd)) {
+ if (settimeo(fd, 0)) {
err = -errno;
goto done;
}
@@ -82,7 +67,7 @@ static void *server(void *arg)
bytes, total_bytes, nr_sent, errno);
done:
- if (fd != -1)
+ if (fd >= 0)
close(fd);
if (err) {
WRITE_ONCE(stop, 1);
@@ -114,7 +99,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
}
if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
- settimeo(lfd) || settimeo(fd))
+ settimeo(lfd, 0) || settimeo(fd, 0))
goto done;
/* bind, listen and start server thread to accept */
@@ -191,8 +176,7 @@ static void test_cubic(void)
return;
link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
- if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
- PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
bpf_cubic__destroy(cubic_skel);
return;
}
@@ -213,8 +197,7 @@ static void test_dctcp(void)
return;
link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
- if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
- PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
bpf_dctcp__destroy(dctcp_skel);
return;
}
@@ -269,6 +252,77 @@ static void test_invalid_license(void)
libbpf_set_print(old_print_fn);
}
+static void test_dctcp_fallback(void)
+{
+ int err, lfd = -1, cli_fd = -1, srv_fd = -1;
+ struct network_helper_opts opts = {
+ .cc = "cubic",
+ };
+ struct bpf_dctcp *dctcp_skel;
+ struct bpf_link *link = NULL;
+ char srv_cc[16];
+ socklen_t cc_len = sizeof(srv_cc);
+
+ dctcp_skel = bpf_dctcp__open();
+ if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+ return;
+ strcpy(dctcp_skel->rodata->fallback, "cubic");
+ if (!ASSERT_OK(bpf_dctcp__load(dctcp_skel), "bpf_dctcp__load"))
+ goto done;
+
+ link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(link, "dctcp link"))
+ goto done;
+
+ lfd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(lfd, 0, "lfd") ||
+ !ASSERT_OK(settcpca(lfd, "bpf_dctcp"), "lfd=>bpf_dctcp"))
+ goto done;
+
+ cli_fd = connect_to_fd_opts(lfd, &opts);
+ if (!ASSERT_GE(cli_fd, 0, "cli_fd"))
+ goto done;
+
+ srv_fd = accept(lfd, NULL, 0);
+ if (!ASSERT_GE(srv_fd, 0, "srv_fd"))
+ goto done;
+ ASSERT_STREQ(dctcp_skel->bss->cc_res, "cubic", "cc_res");
+ ASSERT_EQ(dctcp_skel->bss->tcp_cdg_res, -ENOTSUPP, "tcp_cdg_res");
+
+ err = getsockopt(srv_fd, SOL_TCP, TCP_CONGESTION, srv_cc, &cc_len);
+ if (!ASSERT_OK(err, "getsockopt(srv_fd, TCP_CONGESTION)"))
+ goto done;
+ ASSERT_STREQ(srv_cc, "cubic", "srv_fd cc");
+
+done:
+ bpf_link__destroy(link);
+ bpf_dctcp__destroy(dctcp_skel);
+ if (lfd != -1)
+ close(lfd);
+ if (srv_fd != -1)
+ close(srv_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+}
+
+static void test_rel_setsockopt(void)
+{
+ struct bpf_dctcp_release *rel_skel;
+ libbpf_print_fn_t old_print_fn;
+
+ err_str = "unknown func bpf_setsockopt";
+ found = false;
+
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+ rel_skel = bpf_dctcp_release__open_and_load();
+ libbpf_set_print(old_print_fn);
+
+ ASSERT_ERR_PTR(rel_skel, "rel_skel");
+ ASSERT_TRUE(found, "expected_err_msg");
+
+ bpf_dctcp_release__destroy(rel_skel);
+}
+
void test_bpf_tcp_ca(void)
{
if (test__start_subtest("dctcp"))
@@ -277,4 +331,8 @@ void test_bpf_tcp_ca(void)
test_cubic();
if (test__start_subtest("invalid_license"))
test_invalid_license();
+ if (test__start_subtest("dctcp_fallback"))
+ test_dctcp_fallback();
+ if (test__start_subtest("rel_setsockopt"))
+ test_rel_setsockopt();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 0457ae32b270..649f87382c8d 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3811,7 +3811,7 @@ static void do_test_raw(unsigned int test_num)
always_log);
free(raw_btf);
- err = ((btf_fd == -1) != test->btf_load_err);
+ err = ((btf_fd < 0) != test->btf_load_err);
if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
btf_fd, test->btf_load_err) ||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -3820,7 +3820,7 @@ static void do_test_raw(unsigned int test_num)
goto done;
}
- if (err || btf_fd == -1)
+ if (err || btf_fd < 0)
goto done;
create_attr.name = test->map_name;
@@ -3834,16 +3834,16 @@ static void do_test_raw(unsigned int test_num)
map_fd = bpf_create_map_xattr(&create_attr);
- err = ((map_fd == -1) != test->map_create_err);
+ err = ((map_fd < 0) != test->map_create_err);
CHECK(err, "map_fd:%d test->map_create_err:%u",
map_fd, test->map_create_err);
done:
if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
}
@@ -3941,7 +3941,7 @@ static int test_big_btf_info(unsigned int test_num)
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
always_log);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -3987,7 +3987,7 @@ done:
free(raw_btf);
free(user_btf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
return err;
@@ -4029,7 +4029,7 @@ static int test_btf_id(unsigned int test_num)
btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
always_log);
- if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4043,7 +4043,7 @@ static int test_btf_id(unsigned int test_num)
}
btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
- if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[1] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4071,7 +4071,7 @@ static int test_btf_id(unsigned int test_num)
create_attr.btf_value_type_id = 2;
map_fd = bpf_create_map_xattr(&create_attr);
- if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ if (CHECK(map_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4094,7 +4094,7 @@ static int test_btf_id(unsigned int test_num)
/* Test BTF ID is removed from the kernel */
btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
- if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4105,7 +4105,7 @@ static int test_btf_id(unsigned int test_num)
close(map_fd);
map_fd = -1;
btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
- if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
+ if (CHECK(btf_fd[0] >= 0, "BTF lingers")) {
err = -1;
goto done;
}
@@ -4117,11 +4117,11 @@ done:
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
for (i = 0; i < 2; i++) {
free(user_btf[i]);
- if (btf_fd[i] != -1)
+ if (btf_fd[i] >= 0)
close(btf_fd[i]);
}
@@ -4166,7 +4166,7 @@ static void do_test_get_info(unsigned int test_num)
btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
btf_log_buf, BTF_LOG_BUF_SIZE,
always_log);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd <= 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4212,7 +4212,7 @@ done:
free(raw_btf);
free(user_btf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
}
@@ -4249,8 +4249,9 @@ static void do_test_file(unsigned int test_num)
return;
btf = btf__parse_elf(test->file, &btf_ext);
- if (IS_ERR(btf)) {
- if (PTR_ERR(btf) == -ENOENT) {
+ err = libbpf_get_error(btf);
+ if (err) {
+ if (err == -ENOENT) {
printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC);
test__skip();
return;
@@ -4263,7 +4264,8 @@ static void do_test_file(unsigned int test_num)
btf_ext__free(btf_ext);
obj = bpf_object__open(test->file);
- if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
+ err = libbpf_get_error(obj);
+ if (CHECK(err, "obj: %d", err))
return;
prog = bpf_program__next(NULL, obj);
@@ -4298,7 +4300,7 @@ static void do_test_file(unsigned int test_num)
info_len = sizeof(struct bpf_prog_info);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -4330,7 +4332,7 @@ static void do_test_file(unsigned int test_num)
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -4348,7 +4350,8 @@ static void do_test_file(unsigned int test_num)
goto done;
}
- err = btf__get_from_id(info.btf_id, &btf);
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ err = libbpf_get_error(btf);
if (CHECK(err, "cannot get btf from kernel, err: %d", err))
goto done;
@@ -4384,6 +4387,7 @@ skip:
fprintf(stderr, "OK");
done:
+ btf__free(btf);
free(func_info);
bpf_object__close(obj);
}
@@ -4886,7 +4890,7 @@ static void do_test_pprint(int test_num)
always_log);
free(raw_btf);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4901,7 +4905,7 @@ static void do_test_pprint(int test_num)
create_attr.btf_value_type_id = test->value_type_id;
map_fd = bpf_create_map_xattr(&create_attr);
- if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ if (CHECK(map_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4982,7 +4986,7 @@ static void do_test_pprint(int test_num)
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
- if (err == -1)
+ if (err < 0)
goto done;
}
@@ -4998,7 +5002,7 @@ static void do_test_pprint(int test_num)
cpu, cmapv);
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
- if (err == -1)
+ if (err < 0)
goto done;
cmapv = cmapv + rounded_value_size;
@@ -5036,9 +5040,9 @@ done:
fprintf(stderr, "OK");
if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
if (pin_file)
fclose(pin_file);
@@ -5950,7 +5954,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
/* get necessary lens */
info_len = sizeof(struct bpf_prog_info);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
return -1;
}
@@ -5980,7 +5984,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
info.func_info_rec_size = rec_size;
info.func_info = ptr_to_u64(func_info);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -6044,7 +6048,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
info_len = sizeof(struct bpf_prog_info);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "err:%d errno:%d", err, errno)) {
+ if (CHECK(err < 0, "err:%d errno:%d", err, errno)) {
err = -1;
goto done;
}
@@ -6123,7 +6127,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
* Only recheck the info.*line_info* fields.
* Other fields are not the concern of this test.
*/
- if (CHECK(err == -1 ||
+ if (CHECK(err < 0 ||
info.nr_line_info != cnt ||
(jited_cnt && !info.jited_line_info) ||
info.nr_jited_line_info != jited_cnt ||
@@ -6260,7 +6264,7 @@ static void do_test_info_raw(unsigned int test_num)
always_log);
free(raw_btf);
- if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) {
err = -1;
goto done;
}
@@ -6273,7 +6277,8 @@ static void do_test_info_raw(unsigned int test_num)
patched_linfo = patch_name_tbd(test->line_info,
test->str_sec, linfo_str_off,
test->str_sec_size, &linfo_size);
- if (IS_ERR(patched_linfo)) {
+ err = libbpf_get_error(patched_linfo);
+ if (err) {
fprintf(stderr, "error in creating raw bpf_line_info");
err = -1;
goto done;
@@ -6297,7 +6302,7 @@ static void do_test_info_raw(unsigned int test_num)
}
prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
- err = ((prog_fd == -1) != test->expected_prog_load_failure);
+ err = ((prog_fd < 0) != test->expected_prog_load_failure);
if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d",
prog_fd, test->expected_prog_load_failure, errno) ||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -6306,7 +6311,7 @@ static void do_test_info_raw(unsigned int test_num)
goto done;
}
- if (prog_fd == -1)
+ if (prog_fd < 0)
goto done;
err = test_get_finfo(test, prog_fd);
@@ -6323,12 +6328,12 @@ done:
if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (prog_fd != -1)
+ if (prog_fd >= 0)
close(prog_fd);
- if (!IS_ERR(patched_linfo))
+ if (!libbpf_get_error(patched_linfo))
free(patched_linfo);
}
@@ -6839,9 +6844,9 @@ static void do_test_dedup(unsigned int test_num)
return;
test_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+ err = libbpf_get_error(test_btf);
free(raw_btf);
- if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
- PTR_ERR(test_btf))) {
+ if (CHECK(err, "invalid test_btf errno:%d", err)) {
err = -1;
goto done;
}
@@ -6853,9 +6858,9 @@ static void do_test_dedup(unsigned int test_num)
if (!raw_btf)
return;
expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+ err = libbpf_get_error(expect_btf);
free(raw_btf);
- if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
- PTR_ERR(expect_btf))) {
+ if (CHECK(err, "invalid expect_btf errno:%d", err)) {
err = -1;
goto done;
}
@@ -6966,10 +6971,8 @@ static void do_test_dedup(unsigned int test_num)
}
done:
- if (!IS_ERR(test_btf))
- btf__free(test_btf);
- if (!IS_ERR(expect_btf))
- btf__free(expect_btf);
+ btf__free(test_btf);
+ btf__free(expect_btf);
}
void test_btf(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 5e129dc2073c..52ccf0cf35e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -32,8 +32,9 @@ static int btf_dump_all_types(const struct btf *btf,
int err = 0, id;
d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
- if (IS_ERR(d))
- return PTR_ERR(d);
+ err = libbpf_get_error(d);
+ if (err)
+ return err;
for (id = 1; id <= type_cnt; id++) {
err = btf_dump__dump_type(d, id);
@@ -56,8 +57,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
snprintf(test_file, sizeof(test_file), "%s.o", t->file);
btf = btf__parse_elf(test_file, NULL);
- if (CHECK(IS_ERR(btf), "btf_parse_elf",
- "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
+ if (!ASSERT_OK_PTR(btf, "btf_parse_elf")) {
err = -PTR_ERR(btf);
btf = NULL;
goto done;
@@ -232,7 +232,593 @@ err_out:
btf__free(btf);
}
+#define STRSIZE 4096
+
+static void btf_dump_snprintf(void *ctx, const char *fmt, va_list args)
+{
+ char *s = ctx, new[STRSIZE];
+
+ vsnprintf(new, STRSIZE, fmt, args);
+ if (strlen(s) < STRSIZE)
+ strncat(s, new, STRSIZE - strlen(s) - 1);
+}
+
+static int btf_dump_data(struct btf *btf, struct btf_dump *d,
+ char *name, char *prefix, __u64 flags, void *ptr,
+ size_t ptr_sz, char *str, const char *expected_val)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ size_t type_sz;
+ __s32 type_id;
+ int ret = 0;
+
+ if (flags & BTF_F_COMPACT)
+ opts.compact = true;
+ if (flags & BTF_F_NONAME)
+ opts.skip_names = true;
+ if (flags & BTF_F_ZERO)
+ opts.emit_zeroes = true;
+ if (prefix) {
+ ASSERT_STRNEQ(name, prefix, strlen(prefix),
+ "verify prefix match");
+ name += strlen(prefix) + 1;
+ }
+ type_id = btf__find_by_name(btf, name);
+ if (!ASSERT_GE(type_id, 0, "find type id"))
+ return -ENOENT;
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+ ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, &opts);
+ if (type_sz <= ptr_sz) {
+ if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+ return -EINVAL;
+ } else {
+ if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
+ return -EINVAL;
+ }
+ if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
+ return -EFAULT;
+ return 0;
+}
+
+#define TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \
+ _expected, ...) \
+ do { \
+ char __ptrtype[64] = #_type; \
+ char *_ptrtype = (char *)__ptrtype; \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _ptrtype, _prefix, _flags, \
+ _ptr, sizeof(_type), _str, \
+ _expected); \
+ } while (0)
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_DUMP_DATA_C(_b, _d, _prefix, _str, _type, _flags, \
+ ...) \
+ TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \
+ "(" #_type ")" #__VA_ARGS__, __VA_ARGS__)
+
+/* overflow test; pass typesize < expected type size, ensure E2BIG returned */
+#define TEST_BTF_DUMP_DATA_OVER(_b, _d, _prefix, _str, _type, _type_sz, \
+ _expected, ...) \
+ do { \
+ char __ptrtype[64] = #_type; \
+ char *_ptrtype = (char *)__ptrtype; \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _ptrtype, _prefix, 0, \
+ _ptr, _type_sz, _str, _expected); \
+ } while (0)
+
+#define TEST_BTF_DUMP_VAR(_b, _d, _prefix, _str, _var, _type, _flags, \
+ _expected, ...) \
+ do { \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _var, _prefix, _flags, \
+ _ptr, sizeof(_type), _str, \
+ _expected); \
+ } while (0)
+
+static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+#ifdef __SIZEOF_INT128__
+ __int128 i = 0xffffffffffffffff;
+
+ /* this dance is required because we cannot directly initialize
+ * a 128-bit value to anything larger than a 64-bit value.
+ */
+ i = (i << 64) | (i - 1);
+#endif
+ /* simple int */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, 1234);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "1234", 1234);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)1234", 1234);
+
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT, "(int)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_ZERO,
+ "(int)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, -4567);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "-4567", -4567);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)-4567", -4567);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1);
+
+#ifdef __SIZEOF_INT128__
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT,
+ "(__int128)0xffffffffffffffff",
+ 0xffffffffffffffff);
+ ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str,
+ "(__int128)0xfffffffffffffffffffffffffffffffe"),
+ "dump __int128");
+#endif
+}
+
+static void test_btf_dump_float_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ float t1 = 1.234567;
+ float t2 = -1.234567;
+ float t3 = 0.0;
+ double t4 = 5.678912;
+ double t5 = -5.678912;
+ double t6 = 0.0;
+ long double t7 = 9.876543;
+ long double t8 = -9.876543;
+ long double t9 = 0.0;
+
+ /* since the kernel does not likely have any float types in its BTF, we
+ * will need to add some of various sizes.
+ */
+
+ ASSERT_GT(btf__add_float(btf, "test_float", 4), 0, "add float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t1, 4, str,
+ "(test_float)1.234567"), "dump float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t2, 4, str,
+ "(test_float)-1.234567"), "dump float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t3, 4, str,
+ "(test_float)0.000000"), "dump float");
+
+ ASSERT_GT(btf__add_float(btf, "test_double", 8), 0, "add_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t4, 8, str,
+ "(test_double)5.678912"), "dump double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t5, 8, str,
+ "(test_double)-5.678912"), "dump double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t6, 8, str,
+ "(test_double)0.000000"), "dump double");
+
+ ASSERT_GT(btf__add_float(btf, "test_long_double", 16), 0, "add long double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t7, 16,
+ str, "(test_long_double)9.876543"),
+ "dump long_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t8, 16,
+ str, "(test_long_double)-9.876543"),
+ "dump long_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t9, 16,
+ str, "(test_long_double)0.000000"),
+ "dump long_double");
+}
+
+static void test_btf_dump_char_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* simple char */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, char, BTF_F_COMPACT, 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+ "100", 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)100", 100);
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT,
+ "(char)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_ZERO,
+ "(char)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)0", 0);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, char, sizeof(char)-1, "", 100);
+}
+
+static void test_btf_dump_typedef_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* simple typedef */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, uint64_t, BTF_F_COMPACT, 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+ "1", 1);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)1", 1);
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT, "(u64)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_ZERO,
+ "(u64)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)0", 0);
+
+ /* typedef struct */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, atomic_t, BTF_F_COMPACT,
+ {.counter = (int)1,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+ "{1,}", { .counter = 1 });
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+" .counter = (int)1,\n"
+"}",
+ {.counter = 1,});
+ /* typedef with 0 value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, "(atomic_t){}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+ "{}", {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+"}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_ZERO,
+ "(atomic_t){.counter = (int)0,}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "{0,}", {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_ZERO,
+"(atomic_t){\n"
+" .counter = (int)0,\n"
+"}",
+ { .counter = 0,});
+
+ /* overflow should show type but not value since it overflows */
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, atomic_t, sizeof(atomic_t)-1,
+ "(atomic_t){\n", { .counter = 1});
+}
+
+static void test_btf_dump_enum_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* enum where enum value does (and does not) exist */
+ TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+ "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+ "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "BPF_MAP_CREATE", 0);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_ZERO,
+ "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "BPF_MAP_CREATE", BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 2000);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "2000", 2000);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+ "(enum bpf_cmd)2000", 2000);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "enum", str, enum bpf_cmd,
+ sizeof(enum bpf_cmd) - 1, "", BPF_MAP_CREATE);
+}
+
+static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ char zero_data[512] = { };
+ char type_data[512];
+ void *fops = type_data;
+ void *skb = type_data;
+ size_t type_sz;
+ __s32 type_id;
+ char *cmpstr;
+ int ret;
+
+ memset(type_data, 255, sizeof(type_data));
+
+ /* simple struct */
+ TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+ {.name_off = (__u32)3,.val = (__s32)-1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{3,-1,}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+" .name_off = (__u32)3,\n"
+" .val = (__s32)-1,\n"
+"}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{-1,}",
+ { .name_off = 0, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "{0,-1,}",
+ { .name_off = 0, .val = -1,});
+ /* empty struct should be printed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+ "(struct btf_enum){}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+"}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_ZERO,
+ "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_ZERO,
+"(struct btf_enum){\n"
+" .name_off = (__u32)0,\n"
+" .val = (__s32)0,\n"
+"}",
+ { .name_off = 0, .val = 0,});
+
+ /* struct with pointers */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+ "(struct list_head){.next = (struct list_head *)0x1,}",
+ { .next = (struct list_head *)1 });
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+" .next = (struct list_head *)0x1,\n"
+"}",
+ { .next = (struct list_head *)1 });
+ /* NULL pointer should not be displayed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+ "(struct list_head){}",
+ { .next = (struct list_head *)0 });
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+"}",
+ { .next = (struct list_head *)0 });
+
+ /* struct with function pointers */
+ type_id = btf__find_by_name(btf, "file_operations");
+ if (ASSERT_GT(type_id, 0, "find type id")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ ret = btf_dump__dump_type_data(d, type_id, fops, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping file_operations");
+ cmpstr =
+"(struct file_operations){\n"
+" .owner = (struct module *)0xffffffffffffffff,\n"
+" .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,";
+
+ ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations");
+ }
+
+ /* struct with char array */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){.name = (char[16])['f','o','o',],}",
+ { .name = "foo",});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{['f','o','o',],}",
+ {.name = "foo",});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 0,
+"(struct bpf_prog_info){\n"
+" .name = (char[16])[\n"
+" 'f',\n"
+" 'o',\n"
+" 'o',\n"
+" ],\n"
+"}",
+ {.name = "foo",});
+ /* leading null char means do not display string */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){}",
+ {.name = {'\0', 'f', 'o', 'o'}});
+ /* handle non-printable characters */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){.name = (char[16])[1,2,3,],}",
+ { .name = {1, 2, 3, 0}});
+
+ /* struct with non-char array */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+ "(struct __sk_buff){.cb = (__u32[5])[1,2,3,4,5,],}",
+ { .cb = {1, 2, 3, 4, 5,},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{[1,2,3,4,5,],}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+" .cb = (__u32[5])[\n"
+" 1,\n"
+" 2,\n"
+" 3,\n"
+" 4,\n"
+" 5,\n"
+" ],\n"
+"}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ /* For non-char, arrays, show non-zero values only */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+ "(struct __sk_buff){.cb = (__u32[5])[0,0,1,0,0,],}",
+ { .cb = { 0, 0, 1, 0, 0},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+" .cb = (__u32[5])[\n"
+" 0,\n"
+" 0,\n"
+" 1,\n"
+" 0,\n"
+" 0,\n"
+" ],\n"
+"}",
+ { .cb = { 0, 0, 1, 0, 0},});
+
+ /* struct with bitfields */
+ TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+ {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{1,0x2,0x3,4,5,}",
+ { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+ .imm = 5,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 0,
+"(struct bpf_insn){\n"
+" .code = (__u8)1,\n"
+" .dst_reg = (__u8)0x2,\n"
+" .src_reg = (__u8)0x3,\n"
+" .off = (__s16)4,\n"
+" .imm = (__s32)5,\n"
+"}",
+ {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5});
+
+ /* zeroed bitfields should not be displayed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+ "(struct bpf_insn){.dst_reg = (__u8)0x1,}",
+ { .code = 0, .dst_reg = 1});
+
+ /* struct with enum bitfield */
+ type_id = btf__find_by_name(btf, "fs_context");
+ if (ASSERT_GT(type_id, 0, "find fs_context")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ opts.emit_zeroes = true;
+ ret = btf_dump__dump_type_data(d, type_id, zero_data, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping fs_context");
+
+ ASSERT_NEQ(strstr(str, "FS_CONTEXT_FOR_MOUNT"), NULL,
+ "bitfield value not present");
+ }
+
+ /* struct with nested anon union */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_sock_ops, BTF_F_COMPACT,
+ "(struct bpf_sock_ops){.op = (__u32)1,(union){.args = (__u32[4])[1,2,3,4,],.reply = (__u32)1,.replylong = (__u32[4])[1,2,3,4,],},}",
+ { .op = 1, .args = { 1, 2, 3, 4}});
+
+ /* union with nested struct */
+ TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
+ "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}",
+ { .map = { .map_fd = 1 }});
+
+ /* struct skb with nested structs/unions; because type output is so
+ * complex, we don't do a string comparison, just verify we return
+ * the type size as the amount of data displayed.
+ */
+ type_id = btf__find_by_name(btf, "sk_buff");
+ if (ASSERT_GT(type_id, 0, "find struct sk_buff")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ ret = btf_dump__dump_type_data(d, type_id, skb, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping sk_buff");
+ }
+
+ /* overflow bpf_sock_ops struct with final element nonzero/zero.
+ * Regardless of the value of the final field, we don't have all the
+ * data we need to display it, so we should trigger an overflow.
+ * In other words oveflow checking should trump "is field zero?"
+ * checks because if we've overflowed, it shouldn't matter what the
+ * field is - we can't trust its value so shouldn't display it.
+ */
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+ sizeof(struct bpf_sock_ops) - 1,
+ "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+ { .op = 1, .skb_tcp_flags = 2});
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+ sizeof(struct bpf_sock_ops) - 1,
+ "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+ { .op = 1, .skb_tcp_flags = 0});
+}
+
+static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
+ "int cpu_number = (int)100", 100);
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT,
+ "static int cpu_profile_flip = (int)2", 2);
+}
+
+static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
+ const char *name, const char *expected_val,
+ void *data, size_t data_sz)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ int ret = 0, cmp;
+ size_t secsize;
+ __s32 type_id;
+
+ opts.compact = true;
+
+ type_id = btf__find_by_name(btf, name);
+ if (!ASSERT_GT(type_id, 0, "find type id"))
+ return;
+
+ secsize = btf__resolve_size(btf, type_id);
+ ASSERT_EQ(secsize, 0, "verify section size");
+
+ str[0] = '\0';
+ ret = btf_dump__dump_type_data(d, type_id, data, data_sz, &opts);
+ ASSERT_EQ(ret, 0, "unexpected return value");
+
+ cmp = strcmp(str, expected_val);
+ ASSERT_EQ(cmp, 0, "ensure expected/actual match");
+}
+
+static void test_btf_dump_datasec_data(char *str)
+{
+ struct btf *btf = btf__parse("xdping_kern.o", NULL);
+ struct btf_dump_opts opts = { .ctx = str };
+ char license[4] = "GPL";
+ struct btf_dump *d;
+
+ if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found"))
+ return;
+
+ d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ test_btf_datasec(btf, d, str, "license",
+ "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];",
+ license, sizeof(license));
+}
+
void test_btf_dump() {
+ char str[STRSIZE];
+ struct btf_dump_opts opts = { .ctx = str };
+ struct btf_dump *d;
+ struct btf *btf;
int i;
for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) {
@@ -245,4 +831,33 @@ void test_btf_dump() {
}
if (test__start_subtest("btf_dump: incremental"))
test_btf_dump_incremental();
+
+ btf = libbpf_find_kernel_btf();
+ if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
+ return;
+
+ d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ /* Verify type display for various types. */
+ if (test__start_subtest("btf_dump: int_data"))
+ test_btf_dump_int_data(btf, d, str);
+ if (test__start_subtest("btf_dump: float_data"))
+ test_btf_dump_float_data(btf, d, str);
+ if (test__start_subtest("btf_dump: char_data"))
+ test_btf_dump_char_data(btf, d, str);
+ if (test__start_subtest("btf_dump: typedef_data"))
+ test_btf_dump_typedef_data(btf, d, str);
+ if (test__start_subtest("btf_dump: enum_data"))
+ test_btf_dump_enum_data(btf, d, str);
+ if (test__start_subtest("btf_dump: struct_data"))
+ test_btf_dump_struct_data(btf, d, str);
+ if (test__start_subtest("btf_dump: var_data"))
+ test_btf_dump_var_data(btf, d, str);
+ btf_dump__free(d);
+ btf__free(btf);
+
+ if (test__start_subtest("btf_dump: datasec_data"))
+ test_btf_dump_datasec_data(str);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_module.c b/tools/testing/selftests/bpf/prog_tests/btf_module.c
new file mode 100644
index 000000000000..2239d1fe0332
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_module.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static const char *module_name = "bpf_testmod";
+static const char *symbol_name = "bpf_testmod_test_read";
+
+void test_btf_module()
+{
+ struct btf *vmlinux_btf, *module_btf;
+ __s32 type_id;
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+ return;
+
+ module_btf = btf__load_module_btf(module_name, vmlinux_btf);
+ if (!ASSERT_OK_PTR(module_btf, "could not load module BTF"))
+ goto cleanup;
+
+ type_id = btf__find_by_name(module_btf, symbol_name);
+ ASSERT_GT(type_id, 0, "func not found");
+
+cleanup:
+ btf__free(module_btf);
+ btf__free(vmlinux_btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index f36da15b134f..022c7d89d6f4 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -4,8 +4,6 @@
#include <bpf/btf.h>
#include "btf_helpers.h"
-static int duration = 0;
-
void test_btf_write() {
const struct btf_var_secinfo *vi;
const struct btf_type *t;
@@ -16,7 +14,7 @@ void test_btf_write() {
int id, err, str_off;
btf = btf__new_empty();
- if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf)))
+ if (!ASSERT_OK_PTR(btf, "new_empty"))
return;
str_off = btf__find_str(btf, "int");
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
index 643dfa35419c..876be0ecb654 100644
--- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -102,8 +102,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
*/
parent_link = bpf_program__attach_cgroup(obj->progs.egress,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_link), "parent-cg-attach",
- "err %ld", PTR_ERR(parent_link)))
+ if (!ASSERT_OK_PTR(parent_link, "parent-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -126,8 +125,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
*/
child_link = bpf_program__attach_cgroup(obj->progs.egress,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_link), "child-cg-attach",
- "err %ld", PTR_ERR(child_link)))
+ if (!ASSERT_OK_PTR(child_link, "child-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -147,10 +145,8 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
goto close_bpf_object;
close_bpf_object:
- if (!IS_ERR(parent_link))
- bpf_link__destroy(parent_link);
- if (!IS_ERR(child_link))
- bpf_link__destroy(child_link);
+ bpf_link__destroy(parent_link);
+ bpf_link__destroy(child_link);
cg_storage_multi_egress_only__destroy(obj);
}
@@ -176,18 +172,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
*/
parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
- "err %ld", PTR_ERR(parent_egress1_link)))
+ if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
goto close_bpf_object;
parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
- "err %ld", PTR_ERR(parent_egress2_link)))
+ if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
goto close_bpf_object;
parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
- "err %ld", PTR_ERR(parent_ingress_link)))
+ if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -221,18 +214,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
*/
child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
- "err %ld", PTR_ERR(child_egress1_link)))
+ if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
goto close_bpf_object;
child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
- "err %ld", PTR_ERR(child_egress2_link)))
+ if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
goto close_bpf_object;
child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
- "err %ld", PTR_ERR(child_ingress_link)))
+ if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -264,18 +254,12 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
goto close_bpf_object;
close_bpf_object:
- if (!IS_ERR(parent_egress1_link))
- bpf_link__destroy(parent_egress1_link);
- if (!IS_ERR(parent_egress2_link))
- bpf_link__destroy(parent_egress2_link);
- if (!IS_ERR(parent_ingress_link))
- bpf_link__destroy(parent_ingress_link);
- if (!IS_ERR(child_egress1_link))
- bpf_link__destroy(child_egress1_link);
- if (!IS_ERR(child_egress2_link))
- bpf_link__destroy(child_egress2_link);
- if (!IS_ERR(child_ingress_link))
- bpf_link__destroy(child_ingress_link);
+ bpf_link__destroy(parent_egress1_link);
+ bpf_link__destroy(parent_egress2_link);
+ bpf_link__destroy(parent_ingress_link);
+ bpf_link__destroy(child_egress1_link);
+ bpf_link__destroy(child_egress2_link);
+ bpf_link__destroy(child_ingress_link);
cg_storage_multi_isolated__destroy(obj);
}
@@ -301,18 +285,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
*/
parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
- "err %ld", PTR_ERR(parent_egress1_link)))
+ if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
goto close_bpf_object;
parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
- "err %ld", PTR_ERR(parent_egress2_link)))
+ if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
goto close_bpf_object;
parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
parent_cgroup_fd);
- if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
- "err %ld", PTR_ERR(parent_ingress_link)))
+ if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -338,18 +319,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
*/
child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
- "err %ld", PTR_ERR(child_egress1_link)))
+ if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
goto close_bpf_object;
child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
- "err %ld", PTR_ERR(child_egress2_link)))
+ if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
goto close_bpf_object;
child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
child_cgroup_fd);
- if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
- "err %ld", PTR_ERR(child_ingress_link)))
+ if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
goto close_bpf_object;
err = connect_send(CHILD_CGROUP);
if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -375,18 +353,12 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
goto close_bpf_object;
close_bpf_object:
- if (!IS_ERR(parent_egress1_link))
- bpf_link__destroy(parent_egress1_link);
- if (!IS_ERR(parent_egress2_link))
- bpf_link__destroy(parent_egress2_link);
- if (!IS_ERR(parent_ingress_link))
- bpf_link__destroy(parent_ingress_link);
- if (!IS_ERR(child_egress1_link))
- bpf_link__destroy(child_egress1_link);
- if (!IS_ERR(child_egress2_link))
- bpf_link__destroy(child_egress2_link);
- if (!IS_ERR(child_ingress_link))
- bpf_link__destroy(child_ingress_link);
+ bpf_link__destroy(parent_egress1_link);
+ bpf_link__destroy(parent_egress2_link);
+ bpf_link__destroy(parent_ingress_link);
+ bpf_link__destroy(child_egress1_link);
+ bpf_link__destroy(child_egress2_link);
+ bpf_link__destroy(child_ingress_link);
cg_storage_multi_shared__destroy(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index 0a1fc9816cef..20bb8831dda6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -167,7 +167,7 @@ void test_cgroup_attach_multi(void)
prog_cnt = 2;
CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
BPF_F_QUERY_EFFECTIVE, &attach_flags,
- prog_ids, &prog_cnt) != -1);
+ prog_ids, &prog_cnt) >= 0);
CHECK_FAIL(errno != ENOSPC);
CHECK_FAIL(prog_cnt != 4);
/* check that prog_ids are returned even when buffer is too small */
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 736796e56ed1..9091524131d6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -65,8 +65,7 @@ void test_cgroup_link(void)
for (i = 0; i < cg_nr; i++) {
links[i] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[i].fd);
- if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
- i, PTR_ERR(links[i])))
+ if (!ASSERT_OK_PTR(links[i], "cg_attach"))
goto cleanup;
}
@@ -121,8 +120,7 @@ void test_cgroup_link(void)
links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
- PTR_ERR(links[last_cg])))
+ if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
goto cleanup;
ping_and_check(cg_nr + 1, 0);
@@ -147,7 +145,7 @@ void test_cgroup_link(void)
/* attempt to mix in with multi-attach bpf_link */
tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
+ if (!ASSERT_ERR_PTR(tmp_link, "cg_attach_fail")) {
bpf_link__destroy(tmp_link);
goto cleanup;
}
@@ -165,8 +163,7 @@ void test_cgroup_link(void)
/* attach back link-based one */
links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
- PTR_ERR(links[last_cg])))
+ if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
goto cleanup;
ping_and_check(cg_nr, 0);
@@ -249,8 +246,7 @@ cleanup:
BPF_CGROUP_INET_EGRESS);
for (i = 0; i < cg_nr; i++) {
- if (!IS_ERR(links[i]))
- bpf_link__destroy(links[i]);
+ bpf_link__destroy(links[i]);
}
test_cgroup_link__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
index 464edc1c1708..b9dc4ec655b5 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -60,7 +60,7 @@ static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
goto cleanup;
link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
- if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "cgroup_attach"))
goto cleanup;
run_lookup_test(&skel->bss->g_serv_port, out_sk);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
new file mode 100644
index 000000000000..ab3b9bc5e6d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "connect4_dropper.skel.h"
+
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+static int run_test(int cgroup_fd, int server_fd, bool classid)
+{
+ struct network_helper_opts opts = {
+ .must_fail = true,
+ };
+ struct connect4_dropper *skel;
+ int fd, err = 0;
+
+ skel = connect4_dropper__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return -1;
+
+ skel->links.connect_v4_dropper =
+ bpf_program__attach_cgroup(skel->progs.connect_v4_dropper,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.connect_v4_dropper, "prog_attach")) {
+ err = -1;
+ goto out;
+ }
+
+ if (classid && !ASSERT_OK(join_classid(), "join_classid")) {
+ err = -1;
+ goto out;
+ }
+
+ fd = connect_to_fd_opts(server_fd, &opts);
+ if (fd < 0)
+ err = -1;
+ else
+ close(fd);
+out:
+ connect4_dropper__destroy(skel);
+ return err;
+}
+
+void test_cgroup_v1v2(void)
+{
+ struct network_helper_opts opts = {};
+ int server_fd, client_fd, cgroup_fd;
+ static const int port = 60123;
+
+ /* Step 1: Check base connectivity works without any BPF. */
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+ if (!ASSERT_GE(server_fd, 0, "server_fd"))
+ return;
+ client_fd = connect_to_fd_opts(server_fd, &opts);
+ if (!ASSERT_GE(client_fd, 0, "client_fd")) {
+ close(server_fd);
+ return;
+ }
+ close(client_fd);
+ close(server_fd);
+
+ /* Step 2: Check BPF policy prog attached to cgroups drops connectivity. */
+ cgroup_fd = test__join_cgroup("/connect_dropper");
+ if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
+ return;
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+ if (!ASSERT_GE(server_fd, 0, "server_fd")) {
+ close(cgroup_fd);
+ return;
+ }
+ ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only");
+ setup_classid_environment();
+ set_classid(42);
+ ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2");
+ cleanup_classid_environment();
+ close(server_fd);
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index b62a39315336..012068f33a0a 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -53,7 +53,7 @@ static void test_check_mtu_xdp_attach(void)
prog = skel->progs.xdp_use_helper_basic;
link = bpf_program__attach_xdp(prog, IFINDEX_LO);
- if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto out;
skel->links.xdp_use_helper_basic = link;
diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
index 981c251453d9..3d4b2a358d47 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
@@ -53,8 +53,8 @@ void test_core_autosize(void)
char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
int err, fd = -1, zero = 0;
int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
struct test_core_autosize* skel = NULL;
- struct bpf_object_load_attr load_attr = {};
struct bpf_program *prog;
struct bpf_map *bss_map;
struct btf *btf = NULL;
@@ -125,9 +125,10 @@ void test_core_autosize(void)
fd = -1;
/* open and load BPF program with custom BTF as the kernel BTF */
- skel = test_core_autosize__open();
+ open_opts.btf_custom_path = btf_file;
+ skel = test_core_autosize__open_opts(&open_opts);
if (!ASSERT_OK_PTR(skel, "skel_open"))
- return;
+ goto cleanup;
/* disable handle_signed() for now */
prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
@@ -135,9 +136,7 @@ void test_core_autosize(void)
goto cleanup;
bpf_program__set_autoload(prog, false);
- load_attr.obj = skel->obj;
- load_attr.target_btf_path = btf_file;
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(skel->obj);
if (!ASSERT_OK(err, "prog_load"))
goto cleanup;
@@ -204,14 +203,13 @@ void test_core_autosize(void)
skel = NULL;
/* now re-load with handle_signed() enabled, it should fail loading */
- skel = test_core_autosize__open();
+ open_opts.btf_custom_path = btf_file;
+ skel = test_core_autosize__open_opts(&open_opts);
if (!ASSERT_OK_PTR(skel, "skel_open"))
- return;
+ goto cleanup;
- load_attr.obj = skel->obj;
- load_attr.target_btf_path = btf_file;
- err = bpf_object__load_xattr(&load_attr);
- if (!ASSERT_ERR(err, "bad_prog_load"))
+ err = test_core_autosize__load(skel);
+ if (!ASSERT_ERR(err, "skel_load"))
goto cleanup;
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 607710826dca..4739b15b2a97 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -369,8 +369,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test)
const char *name;
int i;
- if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) ||
- CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) {
+ if (!ASSERT_OK_PTR(local_btf, "local_btf") || !ASSERT_OK_PTR(targ_btf, "targ_btf")) {
btf__free(local_btf);
btf__free(targ_btf);
return -EINVAL;
@@ -817,7 +816,7 @@ static size_t roundup_page(size_t sz)
void test_core_reloc(void)
{
const size_t mmap_sz = roundup_page(sizeof(struct data));
- struct bpf_object_load_attr load_attr = {};
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
struct core_reloc_test_case *test_case;
const char *tp_name, *probe_name;
int err, i, equal;
@@ -847,10 +846,16 @@ void test_core_reloc(void)
continue;
}
- obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
- if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
- test_case->bpf_obj_file, PTR_ERR(obj)))
- continue;
+ if (test_case->btf_src_file) {
+ err = access(test_case->btf_src_file, R_OK);
+ if (!ASSERT_OK(err, "btf_src_file"))
+ goto cleanup;
+ }
+
+ open_opts.btf_custom_path = test_case->btf_src_file;
+ obj = bpf_object__open_file(test_case->bpf_obj_file, &open_opts);
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
+ goto cleanup;
probe_name = "raw_tracepoint/sys_enter";
tp_name = "sys_enter";
@@ -864,17 +869,7 @@ void test_core_reloc(void)
"prog '%s' not found\n", probe_name))
goto cleanup;
-
- if (test_case->btf_src_file) {
- err = access(test_case->btf_src_file, R_OK);
- if (!ASSERT_OK(err, "btf_src_file"))
- goto cleanup;
- }
-
- load_attr.obj = obj;
- load_attr.log_level = 0;
- load_attr.target_btf_path = test_case->btf_src_file;
- err = bpf_object__load_xattr(&load_attr);
+ err = bpf_object__load(obj);
if (err) {
if (!test_case->fails)
ASSERT_OK(err, "obj_load");
@@ -899,8 +894,7 @@ void test_core_reloc(void)
data->my_pid_tgid = my_pid_tgid;
link = bpf_program__attach_raw_tracepoint(prog, tp_name);
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto cleanup;
/* trigger test run */
@@ -941,10 +935,8 @@ cleanup:
CHECK_FAIL(munmap(mmap_data, mmap_sz));
mmap_data = NULL;
}
- if (!IS_ERR_OR_NULL(link)) {
- bpf_link__destroy(link);
- link = NULL;
- }
+ bpf_link__destroy(link);
+ link = NULL;
bpf_object__close(obj);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 109d0345a2be..91154c2ba256 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fentry_test.skel.h"
-#include "fexit_test.skel.h"
+#include "fentry_test.lskel.h"
+#include "fexit_test.lskel.h"
void test_fentry_fexit(void)
{
@@ -26,7 +26,7 @@ void test_fentry_fexit(void)
if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
goto close_prog;
- prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+ prog_fd = fexit_skel->progs.test1.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 7cb111b11995..174c89e7456e 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -1,13 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fentry_test.skel.h"
+#include "fentry_test.lskel.h"
static int fentry_test(struct fentry_test *fentry_skel)
{
int err, prog_fd, i;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
__u64 *result;
err = fentry_test__attach(fentry_skel);
@@ -15,11 +15,11 @@ static int fentry_test(struct fentry_test *fentry_skel)
return err;
/* Check that already linked program can't be attached again. */
- link = bpf_program__attach(fentry_skel->progs.test1);
- if (!ASSERT_ERR_PTR(link, "fentry_attach_link"))
+ link_fd = fentry_test__test1__attach(fentry_skel);
+ if (!ASSERT_LT(link_fd, 0, "fentry_attach_link"))
return -1;
- prog_fd = bpf_program__fd(fentry_skel->progs.test1);
+ prog_fd = fentry_skel->progs.test1.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
ASSERT_OK(err, "test_run");
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 63990842d20f..73b4c76e6b86 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -146,10 +146,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
close_prog:
for (i = 0; i < prog_cnt; i++)
- if (!IS_ERR_OR_NULL(link[i]))
- bpf_link__destroy(link[i]);
- if (!IS_ERR_OR_NULL(obj))
- bpf_object__close(obj);
+ bpf_link__destroy(link[i]);
+ bpf_object__close(obj);
bpf_object__close(tgt_obj);
free(link);
free(prog);
@@ -231,7 +229,7 @@ static int test_second_attach(struct bpf_object *obj)
return err;
link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
- if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd))
+ if (!ASSERT_OK_PTR(link, "second_link"))
goto out;
err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
@@ -283,9 +281,7 @@ static void test_fmod_ret_freplace(void)
opts.attach_prog_fd = pkt_fd;
freplace_obj = bpf_object__open_file(freplace_name, &opts);
- if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open",
- "failed to open %s: %ld\n", freplace_name,
- PTR_ERR(freplace_obj)))
+ if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open"))
goto out;
err = bpf_object__load(freplace_obj);
@@ -294,14 +290,12 @@ static void test_fmod_ret_freplace(void)
prog = bpf_program__next(NULL, freplace_obj);
freplace_link = bpf_program__attach_trace(prog);
- if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n"))
+ if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace"))
goto out;
opts.attach_prog_fd = bpf_program__fd(prog);
fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
- if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open",
- "failed to open %s: %ld\n", fmod_ret_name,
- PTR_ERR(fmod_obj)))
+ if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open"))
goto out;
err = bpf_object__load(fmod_obj);
@@ -350,9 +344,7 @@ static void test_obj_load_failure_common(const char *obj_file,
);
obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
- "failed to open %s: %ld\n", obj_file,
- PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
goto close_prog;
/* It should fail to load the program */
@@ -361,8 +353,7 @@ static void test_obj_load_failure_common(const char *obj_file,
goto close_prog;
close_prog:
- if (!IS_ERR_OR_NULL(obj))
- bpf_object__close(obj);
+ bpf_object__close(obj);
bpf_object__close(pkt_obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
index ccc7e8a34ab6..4e7f4b42ea29 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -6,7 +6,7 @@
#include <time.h>
#include <sys/mman.h>
#include <sys/syscall.h>
-#include "fexit_sleep.skel.h"
+#include "fexit_sleep.lskel.h"
static int do_sleep(void *skel)
{
@@ -58,8 +58,8 @@ void test_fexit_sleep(void)
* waiting for percpu_ref_kill to confirm). The other one
* will be freed quickly.
*/
- close(bpf_program__fd(fexit_skel->progs.nanosleep_fentry));
- close(bpf_program__fd(fexit_skel->progs.nanosleep_fexit));
+ close(fexit_skel->progs.nanosleep_fentry.prog_fd);
+ close(fexit_skel->progs.nanosleep_fexit.prog_fd);
fexit_sleep__detach(fexit_skel);
/* kill the thread to unwind sys_nanosleep stack through the trampoline */
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 6792e41f7f69..af3dba726701 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -1,13 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fexit_test.skel.h"
+#include "fexit_test.lskel.h"
static int fexit_test(struct fexit_test *fexit_skel)
{
int err, prog_fd, i;
__u32 duration = 0, retval;
- struct bpf_link *link;
+ int link_fd;
__u64 *result;
err = fexit_test__attach(fexit_skel);
@@ -15,11 +15,11 @@ static int fexit_test(struct fexit_test *fexit_skel)
return err;
/* Check that already linked program can't be attached again. */
- link = bpf_program__attach(fexit_skel->progs.test1);
- if (!ASSERT_ERR_PTR(link, "fexit_attach_link"))
+ link_fd = fexit_test__test1__attach(fexit_skel);
+ if (!ASSERT_LT(link_fd, 0, "fexit_attach_link"))
return -1;
- prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+ prog_fd = fexit_skel->progs.test1.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
ASSERT_OK(err, "test_run");
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index cd6dc80edf18..225714f71ac6 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -541,7 +541,7 @@ static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
return;
link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
- if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_netns"))
goto out_close;
run_tests_skb_less(tap_fd, skel->maps.last_dissection);
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 172c586b6996..3931ede5c534 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -134,9 +134,9 @@ static void test_link_create_link_create(int netns, int prog1, int prog2)
/* Expect failure creating link when another link exists */
errno = 0;
link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
- if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+ if (CHECK_FAIL(link2 >= 0 || errno != E2BIG))
perror("bpf_prog_attach(prog2) expected E2BIG");
- if (link2 != -1)
+ if (link2 >= 0)
close(link2);
CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
@@ -159,9 +159,9 @@ static void test_prog_attach_link_create(int netns, int prog1, int prog2)
/* Expect failure creating link when prog attached */
errno = 0;
link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
- if (CHECK_FAIL(link != -1 || errno != EEXIST))
+ if (CHECK_FAIL(link >= 0 || errno != EEXIST))
perror("bpf_link_create(prog2) expected EEXIST");
- if (link != -1)
+ if (link >= 0)
close(link);
CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
@@ -623,7 +623,7 @@ static void run_tests(int netns)
}
out_close:
for (i = 0; i < ARRAY_SIZE(progs); i++) {
- if (progs[i] != -1)
+ if (progs[i] >= 0)
CHECK_FAIL(close(progs[i]));
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
new file mode 100644
index 000000000000..02a465f36d59
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "get_func_ip_test.skel.h"
+
+void test_get_func_ip_test(void)
+{
+ struct get_func_ip_test *skel = NULL;
+ __u32 duration = 0, retval;
+ int err, prog_fd;
+
+ skel = get_func_ip_test__open();
+ if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
+ return;
+
+ /* test6 is x86_64 specifc because of the instruction
+ * offset, disabling it for all other archs
+ */
+#ifndef __x86_64__
+ bpf_program__set_autoload(skel->progs.test6, false);
+ bpf_program__set_autoload(skel->progs.test7, false);
+#endif
+
+ err = get_func_ip_test__load(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__load"))
+ goto cleanup;
+
+ err = get_func_ip_test__attach(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 0, "test_run");
+
+ prog_fd = bpf_program__fd(skel->progs.test5);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+
+ ASSERT_OK(err, "test_run");
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
+ ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
+ ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
+ ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
+ ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
+#ifdef __x86_64__
+ ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
+ ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
+#endif
+
+cleanup:
+ get_func_ip_test__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index 925722217edf..522237aa4470 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -121,12 +121,12 @@ void test_get_stack_raw_tp(void)
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
pb_opts.sample_cb = get_stack_print_output;
pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto close_prog;
/* trigger some syscall action */
@@ -141,9 +141,7 @@ void test_get_stack_raw_tp(void)
}
close_prog:
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
- if (!IS_ERR_OR_NULL(pb))
- perf_buffer__free(pb);
+ bpf_link__destroy(link);
+ perf_buffer__free(pb);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
index d884b2ed5bc5..8d5a6023a1bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -48,8 +48,7 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain",
- "should have failed\n");
+ ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_no_callchain");
close(pmu_fd);
/* add PERF_SAMPLE_CALLCHAIN, attach should succeed */
@@ -65,8 +64,7 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain",
- "err: %ld\n", PTR_ERR(skel->links.oncpu));
+ ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain");
close(pmu_fd);
/* add exclude_callchain_kernel, attach should fail */
@@ -82,8 +80,7 @@ void test_get_stackid_cannot_attach(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel",
- "should have failed\n");
+ ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_exclude_callchain_kernel");
close(pmu_fd);
cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c
index 428d488830c6..4747ab18f97f 100644
--- a/tools/testing/selftests/bpf/prog_tests/hashmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -48,8 +48,7 @@ static void test_hashmap_generic(void)
struct hashmap *map;
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
return;
for (i = 0; i < ELEM_CNT; i++) {
@@ -267,8 +266,7 @@ static void test_hashmap_multimap(void)
/* force collisions */
map = hashmap__new(collision_hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
return;
/* set up multimap:
@@ -339,8 +337,7 @@ static void test_hashmap_empty()
/* force collisions */
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
goto cleanup;
if (CHECK(hashmap__size(map) != 0, "hashmap__size",
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index d65107919998..ddfb6bf97152 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -97,15 +97,13 @@ void test_kfree_skb(void)
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
link_fentry = bpf_program__attach_trace(fentry);
- if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n",
- PTR_ERR(link_fentry)))
+ if (!ASSERT_OK_PTR(link_fentry, "attach fentry"))
goto close_prog;
link_fexit = bpf_program__attach_trace(fexit);
- if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n",
- PTR_ERR(link_fexit)))
+ if (!ASSERT_OK_PTR(link_fexit, "attach fexit"))
goto close_prog;
perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
@@ -116,7 +114,7 @@ void test_kfree_skb(void)
pb_opts.sample_cb = on_sample;
pb_opts.ctx = &passed;
pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto close_prog;
memcpy(skb.cb, &cb, sizeof(cb));
@@ -144,12 +142,9 @@ void test_kfree_skb(void)
CHECK_FAIL(!test_ok[0] || !test_ok[1]);
close_prog:
perf_buffer__free(pb);
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
- if (!IS_ERR_OR_NULL(link_fentry))
- bpf_link__destroy(link_fentry);
- if (!IS_ERR_OR_NULL(link_fexit))
- bpf_link__destroy(link_fexit);
+ bpf_link__destroy(link);
+ bpf_link__destroy(link_fentry);
+ bpf_link__destroy(link_fexit);
bpf_object__close(obj);
bpf_object__close(obj2);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 7fc0951ee75f..9611f2bc50df 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2021 Facebook */
#include <test_progs.h>
#include <network_helpers.h>
-#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test.lskel.h"
#include "kfunc_call_test_subprog.skel.h"
static void test_main(void)
@@ -14,13 +14,13 @@ static void test_main(void)
if (!ASSERT_OK_PTR(skel, "skel"))
return;
- prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+ prog_fd = skel->progs.kfunc_call_test1.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, (__u32 *)&retval, NULL);
ASSERT_OK(err, "bpf_prog_test_run(test1)");
ASSERT_EQ(retval, 12, "test1-retval");
- prog_fd = bpf_program__fd(skel->progs.kfunc_call_test2);
+ prog_fd = skel->progs.kfunc_call_test2.prog_fd;
err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
NULL, NULL, (__u32 *)&retval, NULL);
ASSERT_OK(err, "bpf_prog_test_run(test2)");
@@ -44,7 +44,7 @@ static void test_subprog(void)
ASSERT_OK(err, "bpf_prog_test_run(test1)");
ASSERT_EQ(retval, 10, "test1-retval");
ASSERT_NEQ(skel->data->active_res, -1, "active_res");
- ASSERT_EQ(skel->data->sk_state, BPF_TCP_CLOSE, "sk_state");
+ ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
kfunc_call_test_subprog__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index b58b775d19f3..cf3acfa5a91d 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -6,6 +6,7 @@
#include <bpf/btf.h>
#include "test_ksyms_btf.skel.h"
#include "test_ksyms_btf_null_check.skel.h"
+#include "test_ksyms_weak.skel.h"
static int duration;
@@ -81,14 +82,40 @@ static void test_null_check(void)
test_ksyms_btf_null_check__destroy(skel);
}
+static void test_weak_syms(void)
+{
+ struct test_ksyms_weak *skel;
+ struct test_ksyms_weak__data *data;
+ int err;
+
+ skel = test_ksyms_weak__open_and_load();
+ if (CHECK(!skel, "test_ksyms_weak__open_and_load", "failed\n"))
+ return;
+
+ err = test_ksyms_weak__attach(skel);
+ if (CHECK(err, "test_ksyms_weak__attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym");
+ ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym");
+
+cleanup:
+ test_ksyms_weak__destroy(skel);
+}
+
void test_ksyms_btf(void)
{
int percpu_datasec;
struct btf *btf;
btf = libbpf_find_kernel_btf();
- if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
- PTR_ERR(btf)))
+ if (!ASSERT_OK_PTR(btf, "btf_exists"))
return;
percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
@@ -106,4 +133,7 @@ void test_ksyms_btf(void)
if (test__start_subtest("null_check"))
test_null_check();
+
+ if (test__start_subtest("weak_ksyms"))
+ test_weak_syms();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
index 4c232b456479..2cd5cded543f 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
@@ -4,7 +4,7 @@
#include <test_progs.h>
#include <bpf/libbpf.h>
#include <bpf/btf.h>
-#include "test_ksyms_module.skel.h"
+#include "test_ksyms_module.lskel.h"
static int duration;
diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
index a743288cf384..6fc97c45f71e 100644
--- a/tools/testing/selftests/bpf/prog_tests/link_pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
@@ -17,7 +17,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
int err, i;
link = bpf_program__attach(prog);
- if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
bss->in = 1;
@@ -51,7 +51,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
/* re-open link from BPFFS */
link = bpf_link__open(link_pin_path);
- if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_open"))
goto cleanup;
CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2",
@@ -84,8 +84,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i);
cleanup:
- if (!IS_ERR(link))
- bpf_link__destroy(link);
+ bpf_link__destroy(link);
}
void test_link_pinning(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
new file mode 100644
index 000000000000..beebfa9730e1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "test_lookup_and_delete.skel.h"
+
+#define START_VALUE 1234
+#define NEW_VALUE 4321
+#define MAX_ENTRIES 2
+
+static int duration;
+static int nr_cpus;
+
+static int fill_values(int map_fd)
+{
+ __u64 key, value = START_VALUE;
+ int err;
+
+ for (key = 1; key < MAX_ENTRIES + 1; key++) {
+ err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int fill_values_percpu(int map_fd)
+{
+ __u64 key, value[nr_cpus];
+ int i, err;
+
+ for (i = 0; i < nr_cpus; i++)
+ value[i] = START_VALUE;
+
+ for (key = 1; key < MAX_ENTRIES + 1; key++) {
+ err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct test_lookup_and_delete *setup_prog(enum bpf_map_type map_type,
+ int *map_fd)
+{
+ struct test_lookup_and_delete *skel;
+ int err;
+
+ skel = test_lookup_and_delete__open();
+ if (!ASSERT_OK_PTR(skel, "test_lookup_and_delete__open"))
+ return NULL;
+
+ err = bpf_map__set_type(skel->maps.hash_map, map_type);
+ if (!ASSERT_OK(err, "bpf_map__set_type"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(skel->maps.hash_map, MAX_ENTRIES);
+ if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+ goto cleanup;
+
+ err = test_lookup_and_delete__load(skel);
+ if (!ASSERT_OK(err, "test_lookup_and_delete__load"))
+ goto cleanup;
+
+ *map_fd = bpf_map__fd(skel->maps.hash_map);
+ if (!ASSERT_GE(*map_fd, 0, "bpf_map__fd"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+ return NULL;
+}
+
+/* Triggers BPF program that updates map with given key and value */
+static int trigger_tp(struct test_lookup_and_delete *skel, __u64 key,
+ __u64 value)
+{
+ int err;
+
+ skel->bss->set_pid = getpid();
+ skel->bss->set_key = key;
+ skel->bss->set_value = value;
+
+ err = test_lookup_and_delete__attach(skel);
+ if (!ASSERT_OK(err, "test_lookup_and_delete__attach"))
+ return -1;
+
+ syscall(__NR_getpgid);
+
+ test_lookup_and_delete__detach(skel);
+
+ return 0;
+}
+
+static void test_lookup_and_delete_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, value;
+ int map_fd, err;
+
+ /* Setup program and fill the map. */
+ skel = setup_prog(BPF_MAP_TYPE_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values(map_fd);
+ if (!ASSERT_OK(err, "fill_values"))
+ goto cleanup;
+
+ /* Lookup and delete element. */
+ key = 1;
+ err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ /* Fetched value should match the initially set value. */
+ if (CHECK(value != START_VALUE, "bpf_map_lookup_and_delete_elem",
+ "unexpected value=%lld\n", value))
+ goto cleanup;
+
+ /* Check that the entry is non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_percpu_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, val, value[nr_cpus];
+ int map_fd, err, i;
+
+ /* Setup program and fill the map. */
+ skel = setup_prog(BPF_MAP_TYPE_PERCPU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values_percpu(map_fd);
+ if (!ASSERT_OK(err, "fill_values_percpu"))
+ goto cleanup;
+
+ /* Lookup and delete element. */
+ key = 1;
+ err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ for (i = 0; i < nr_cpus; i++) {
+ val = value[i];
+
+ /* Fetched value should match the initially set value. */
+ if (CHECK(val != START_VALUE, "map value",
+ "unexpected for cpu %d: %lld\n", i, val))
+ goto cleanup;
+ }
+
+ /* Check that the entry is non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, value;
+ int map_fd, err;
+
+ /* Setup program and fill the LRU map. */
+ skel = setup_prog(BPF_MAP_TYPE_LRU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values(map_fd);
+ if (!ASSERT_OK(err, "fill_values"))
+ goto cleanup;
+
+ /* Insert new element at key=3, should reuse LRU element. */
+ key = 3;
+ err = trigger_tp(skel, key, NEW_VALUE);
+ if (!ASSERT_OK(err, "trigger_tp"))
+ goto cleanup;
+
+ /* Lookup and delete element 3. */
+ err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ /* Value should match the new value. */
+ if (CHECK(value != NEW_VALUE, "bpf_map_lookup_and_delete_elem",
+ "unexpected value=%lld\n", value))
+ goto cleanup;
+
+ /* Check that entries 3 and 1 are non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ key = 1;
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_percpu_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, val, value[nr_cpus];
+ int map_fd, err, i, cpucnt = 0;
+
+ /* Setup program and fill the LRU map. */
+ skel = setup_prog(BPF_MAP_TYPE_LRU_PERCPU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values_percpu(map_fd);
+ if (!ASSERT_OK(err, "fill_values_percpu"))
+ goto cleanup;
+
+ /* Insert new element at key=3, should reuse LRU element 1. */
+ key = 3;
+ err = trigger_tp(skel, key, NEW_VALUE);
+ if (!ASSERT_OK(err, "trigger_tp"))
+ goto cleanup;
+
+ /* Clean value. */
+ for (i = 0; i < nr_cpus; i++)
+ value[i] = 0;
+
+ /* Lookup and delete element 3. */
+ err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) {
+ goto cleanup;
+ }
+
+ /* Check if only one CPU has set the value. */
+ for (i = 0; i < nr_cpus; i++) {
+ val = value[i];
+ if (val) {
+ if (CHECK(val != NEW_VALUE, "map value",
+ "unexpected for cpu %d: %lld\n", i, val))
+ goto cleanup;
+ cpucnt++;
+ }
+ }
+ if (CHECK(cpucnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+ cpucnt))
+ goto cleanup;
+
+ /* Check that entries 3 and 1 are non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ key = 1;
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+void test_lookup_and_delete(void)
+{
+ nr_cpus = bpf_num_possible_cpus();
+
+ if (test__start_subtest("lookup_and_delete"))
+ test_lookup_and_delete_hash();
+ if (test__start_subtest("lookup_and_delete_percpu"))
+ test_lookup_and_delete_percpu_hash();
+ if (test__start_subtest("lookup_and_delete_lru"))
+ test_lookup_and_delete_lru_hash();
+ if (test__start_subtest("lookup_and_delete_lru_percpu"))
+ test_lookup_and_delete_lru_percpu_hash();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
new file mode 100644
index 000000000000..59adb4715394
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ * 1. call listen() for 4 server sockets.
+ * 2. call connect() for 25 client sockets.
+ * 3. call listen() for 1 server socket. (migration target)
+ * 4. update a map to migrate all child sockets
+ * to the last server socket (migrate_map[cookie] = 4)
+ * 5. call shutdown() for first 4 server sockets
+ * and migrate the requests in the accept queue
+ * to the last server socket.
+ * 6. call listen() for the second server socket.
+ * 7. call shutdown() for the last server
+ * and migrate the requests in the accept queue
+ * to the second server socket.
+ * 8. call listen() for the last server.
+ * 9. call shutdown() for the second server
+ * and migrate the requests in the accept queue
+ * to the last server socket.
+ * 10. call accept() for the last server socket.
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_progs.h"
+#include "test_migrate_reuseport.skel.h"
+#include "network_helpers.h"
+
+#ifndef TCP_FASTOPEN_CONNECT
+#define TCP_FASTOPEN_CONNECT 30
+#endif
+
+#define IFINDEX_LO 1
+
+#define NR_SERVERS 5
+#define NR_CLIENTS (NR_SERVERS * 5)
+#define MIGRATED_TO (NR_SERVERS - 1)
+
+/* fastopenq->max_qlen and sk->sk_max_ack_backlog */
+#define QLEN (NR_CLIENTS * 5)
+
+#define MSG "Hello World\0"
+#define MSGLEN 12
+
+static struct migrate_reuseport_test_case {
+ const char *name;
+ __s64 servers[NR_SERVERS];
+ __s64 clients[NR_CLIENTS];
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+ int family;
+ int state;
+ bool drop_ack;
+ bool expire_synack_timer;
+ bool fastopen;
+ struct bpf_link *link;
+} test_cases[] = {
+ {
+ .name = "IPv4 TCP_ESTABLISHED inet_csk_listen_stop",
+ .family = AF_INET,
+ .state = BPF_TCP_ESTABLISHED,
+ .drop_ack = false,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv4 TCP_SYN_RECV inet_csk_listen_stop",
+ .family = AF_INET,
+ .state = BPF_TCP_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = true,
+ },
+ {
+ .name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
+ .family = AF_INET,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = true,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+ .family = AF_INET,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_ESTABLISHED inet_csk_listen_stop",
+ .family = AF_INET6,
+ .state = BPF_TCP_ESTABLISHED,
+ .drop_ack = false,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_SYN_RECV inet_csk_listen_stop",
+ .family = AF_INET6,
+ .state = BPF_TCP_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = true,
+ },
+ {
+ .name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
+ .family = AF_INET6,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = true,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+ .family = AF_INET6,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ }
+};
+
+static void init_fds(__s64 fds[], int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ fds[i] = -1;
+}
+
+static void close_fds(__s64 fds[], int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (fds[i] != -1) {
+ close(fds[i]);
+ fds[i] = -1;
+ }
+ }
+}
+
+static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
+{
+ int err = 0, fd, len;
+
+ fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+ if (!ASSERT_NEQ(fd, -1, "open"))
+ return -1;
+
+ if (restore) {
+ len = write(fd, buf, *saved_len);
+ if (!ASSERT_EQ(len, *saved_len, "write - restore"))
+ err = -1;
+ } else {
+ *saved_len = read(fd, buf, size);
+ if (!ASSERT_GE(*saved_len, 1, "read")) {
+ err = -1;
+ goto close;
+ }
+
+ err = lseek(fd, 0, SEEK_SET);
+ if (!ASSERT_OK(err, "lseek"))
+ goto close;
+
+ /* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
+ * TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
+ */
+ len = write(fd, "519", 3);
+ if (!ASSERT_EQ(len, 3, "write - setup"))
+ err = -1;
+ }
+
+close:
+ close(fd);
+
+ return err;
+}
+
+static int drop_ack(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ if (test_case->family == AF_INET)
+ skel->bss->server_port = ((struct sockaddr_in *)
+ &test_case->addr)->sin_port;
+ else
+ skel->bss->server_port = ((struct sockaddr_in6 *)
+ &test_case->addr)->sin6_port;
+
+ test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
+ IFINDEX_LO);
+ if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
+ return -1;
+
+ return 0;
+}
+
+static int pass_ack(struct migrate_reuseport_test_case *test_case)
+{
+ int err;
+
+ err = bpf_link__detach(test_case->link);
+ if (!ASSERT_OK(err, "bpf_link__detach"))
+ return -1;
+
+ test_case->link = NULL;
+
+ return 0;
+}
+
+static int start_servers(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int i, err, prog_fd, reuseport = 1, qlen = QLEN;
+
+ prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
+
+ make_sockaddr(test_case->family,
+ test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
+ &test_case->addr, &test_case->addrlen);
+
+ for (i = 0; i < NR_SERVERS; i++) {
+ test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
+ IPPROTO_TCP);
+ if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
+ return -1;
+
+ err = setsockopt(test_case->servers[i], SOL_SOCKET,
+ SO_REUSEPORT, &reuseport, sizeof(reuseport));
+ if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
+ return -1;
+
+ err = bind(test_case->servers[i],
+ (struct sockaddr *)&test_case->addr,
+ test_case->addrlen);
+ if (!ASSERT_OK(err, "bind"))
+ return -1;
+
+ if (i == 0) {
+ err = setsockopt(test_case->servers[i], SOL_SOCKET,
+ SO_ATTACH_REUSEPORT_EBPF,
+ &prog_fd, sizeof(prog_fd));
+ if (!ASSERT_OK(err,
+ "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
+ return -1;
+
+ err = getsockname(test_case->servers[i],
+ (struct sockaddr *)&test_case->addr,
+ &test_case->addrlen);
+ if (!ASSERT_OK(err, "getsockname"))
+ return -1;
+ }
+
+ if (test_case->fastopen) {
+ err = setsockopt(test_case->servers[i],
+ SOL_TCP, TCP_FASTOPEN,
+ &qlen, sizeof(qlen));
+ if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
+ return -1;
+ }
+
+ /* All requests will be tied to the first four listeners */
+ if (i != MIGRATED_TO) {
+ err = listen(test_case->servers[i], qlen);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int start_clients(struct migrate_reuseport_test_case *test_case)
+{
+ char buf[MSGLEN] = MSG;
+ int i, err;
+
+ for (i = 0; i < NR_CLIENTS; i++) {
+ test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
+ IPPROTO_TCP);
+ if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
+ return -1;
+
+ /* The attached XDP program drops only the final ACK, so
+ * clients will transition to TCP_ESTABLISHED immediately.
+ */
+ err = settimeo(test_case->clients[i], 100);
+ if (!ASSERT_OK(err, "settimeo"))
+ return -1;
+
+ if (test_case->fastopen) {
+ int fastopen = 1;
+
+ err = setsockopt(test_case->clients[i], IPPROTO_TCP,
+ TCP_FASTOPEN_CONNECT, &fastopen,
+ sizeof(fastopen));
+ if (!ASSERT_OK(err,
+ "setsockopt - TCP_FASTOPEN_CONNECT"))
+ return -1;
+ }
+
+ err = connect(test_case->clients[i],
+ (struct sockaddr *)&test_case->addr,
+ test_case->addrlen);
+ if (!ASSERT_OK(err, "connect"))
+ return -1;
+
+ err = write(test_case->clients[i], buf, MSGLEN);
+ if (!ASSERT_EQ(err, MSGLEN, "write"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int update_maps(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int i, err, migrated_to = MIGRATED_TO;
+ int reuseport_map_fd, migrate_map_fd;
+ __u64 value;
+
+ reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
+ migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
+
+ for (i = 0; i < NR_SERVERS; i++) {
+ value = (__u64)test_case->servers[i];
+ err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
+ BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
+ return -1;
+
+ err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
+ return -1;
+
+ err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
+ BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int migrate_dance(struct migrate_reuseport_test_case *test_case)
+{
+ int i, err;
+
+ /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
+ * to the last listener based on eBPF.
+ */
+ for (i = 0; i < MIGRATED_TO; i++) {
+ err = shutdown(test_case->servers[i], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+ }
+
+ /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
+ if (test_case->state == BPF_TCP_NEW_SYN_RECV)
+ return 0;
+
+ /* Note that we use the second listener instead of the
+ * first one here.
+ *
+ * The fist listener is bind()ed with port 0 and,
+ * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
+ * calling listen() again will bind() the first listener
+ * on a new ephemeral port and detach it from the existing
+ * reuseport group. (See: __inet_bind(), tcp_set_state())
+ *
+ * OTOH, the second one is bind()ed with a specific port,
+ * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
+ * resurrect the listener on the existing reuseport group.
+ */
+ err = listen(test_case->servers[1], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+
+ /* Migrate from the last listener to the second one.
+ *
+ * All listeners were detached out of the reuseport_map,
+ * so migration will be done by kernel random pick from here.
+ */
+ err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+
+ /* Back to the existing reuseport group */
+ err = listen(test_case->servers[MIGRATED_TO], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+
+ /* Migrate back to the last one from the second one */
+ err = shutdown(test_case->servers[1], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+
+ return 0;
+}
+
+static void count_requests(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int err, cnt = 0, client;
+ char buf[MSGLEN];
+
+ err = settimeo(test_case->servers[MIGRATED_TO], 4000);
+ if (!ASSERT_OK(err, "settimeo"))
+ goto out;
+
+ for (; cnt < NR_CLIENTS; cnt++) {
+ client = accept(test_case->servers[MIGRATED_TO],
+ (struct sockaddr *)&addr, &len);
+ if (!ASSERT_NEQ(client, -1, "accept"))
+ goto out;
+
+ memset(buf, 0, MSGLEN);
+ read(client, &buf, MSGLEN);
+ close(client);
+
+ if (!ASSERT_STREQ(buf, MSG, "read"))
+ goto out;
+ }
+
+out:
+ ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
+
+ switch (test_case->state) {
+ case BPF_TCP_ESTABLISHED:
+ cnt = skel->bss->migrated_at_close;
+ break;
+ case BPF_TCP_SYN_RECV:
+ cnt = skel->bss->migrated_at_close_fastopen;
+ break;
+ case BPF_TCP_NEW_SYN_RECV:
+ if (test_case->expire_synack_timer)
+ cnt = skel->bss->migrated_at_send_synack;
+ else
+ cnt = skel->bss->migrated_at_recv_ack;
+ break;
+ default:
+ cnt = 0;
+ }
+
+ ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
+}
+
+static void run_test(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int err, saved_len;
+ char buf[16];
+
+ skel->bss->migrated_at_close = 0;
+ skel->bss->migrated_at_close_fastopen = 0;
+ skel->bss->migrated_at_send_synack = 0;
+ skel->bss->migrated_at_recv_ack = 0;
+
+ init_fds(test_case->servers, NR_SERVERS);
+ init_fds(test_case->clients, NR_CLIENTS);
+
+ if (test_case->fastopen) {
+ memset(buf, 0, sizeof(buf));
+
+ err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
+ if (!ASSERT_OK(err, "setup_fastopen - setup"))
+ return;
+ }
+
+ err = start_servers(test_case, skel);
+ if (!ASSERT_OK(err, "start_servers"))
+ goto close_servers;
+
+ if (test_case->drop_ack) {
+ /* Drop the final ACK of the 3-way handshake and stick the
+ * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
+ */
+ err = drop_ack(test_case, skel);
+ if (!ASSERT_OK(err, "drop_ack"))
+ goto close_servers;
+ }
+
+ /* Tie requests to the first four listners */
+ err = start_clients(test_case);
+ if (!ASSERT_OK(err, "start_clients"))
+ goto close_clients;
+
+ err = listen(test_case->servers[MIGRATED_TO], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ goto close_clients;
+
+ err = update_maps(test_case, skel);
+ if (!ASSERT_OK(err, "fill_maps"))
+ goto close_clients;
+
+ /* Migrate the requests in the accept queue only.
+ * TCP_NEW_SYN_RECV requests are not migrated at this point.
+ */
+ err = migrate_dance(test_case);
+ if (!ASSERT_OK(err, "migrate_dance"))
+ goto close_clients;
+
+ if (test_case->expire_synack_timer) {
+ /* Wait for SYN+ACK timers to expire so that
+ * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
+ */
+ sleep(1);
+ }
+
+ if (test_case->link) {
+ /* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
+ err = pass_ack(test_case);
+ if (!ASSERT_OK(err, "pass_ack"))
+ goto close_clients;
+ }
+
+ count_requests(test_case, skel);
+
+close_clients:
+ close_fds(test_case->clients, NR_CLIENTS);
+
+ if (test_case->link) {
+ err = pass_ack(test_case);
+ ASSERT_OK(err, "pass_ack - clean up");
+ }
+
+close_servers:
+ close_fds(test_case->servers, NR_SERVERS);
+
+ if (test_case->fastopen) {
+ err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
+ ASSERT_OK(err, "setup_fastopen - restore");
+ }
+}
+
+void test_migrate_reuseport(void)
+{
+ struct test_migrate_reuseport *skel;
+ int i;
+
+ skel = test_migrate_reuseport__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ test__start_subtest(test_cases[i].name);
+ run_test(&test_cases[i], skel);
+ }
+
+ test_migrate_reuseport__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c
new file mode 100644
index 000000000000..6ede48bde91b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/sysinfo.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "netcnt_prog.skel.h"
+#include "netcnt_common.h"
+
+#define CG_NAME "/netcnt"
+
+void test_netcnt(void)
+{
+ union percpu_net_cnt *percpu_netcnt = NULL;
+ struct bpf_cgroup_storage_key key;
+ int map_fd, percpu_map_fd;
+ struct netcnt_prog *skel;
+ unsigned long packets;
+ union net_cnt netcnt;
+ unsigned long bytes;
+ int cpu, nproc;
+ int cg_fd = -1;
+ char cmd[128];
+
+ skel = netcnt_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
+ return;
+
+ nproc = get_nprocs_conf();
+ percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
+ if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
+ goto err;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup"))
+ goto err;
+
+ skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt,
+ "attach_cgroup(bpf_nextcnt)"))
+ goto err;
+
+ snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6));
+ ASSERT_OK(system(cmd), cmd);
+
+ map_fd = bpf_map__fd(skel->maps.netcnt);
+ if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key"))
+ goto err;
+
+ if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)"))
+ goto err;
+
+ percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt);
+ if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]),
+ "bpf_map_lookup_elem(percpu_netcnt)"))
+ goto err;
+
+ /* Some packets can be still in per-cpu cache, but not more than
+ * MAX_PERCPU_PACKETS.
+ */
+ packets = netcnt.packets;
+ bytes = netcnt.bytes;
+ for (cpu = 0; cpu < nproc; cpu++) {
+ ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS");
+
+ packets += percpu_netcnt[cpu].packets;
+ bytes += percpu_netcnt[cpu].bytes;
+ }
+
+ /* No packets should be lost */
+ ASSERT_EQ(packets, 10000, "packets");
+
+ /* Let's check that bytes counter matches the number of packets
+ * multiplied by the size of ipv6 ICMP packet.
+ */
+ ASSERT_EQ(bytes, packets * 104, "bytes");
+
+err:
+ if (cg_fd != -1)
+ close(cg_fd);
+ free(percpu_netcnt);
+ netcnt_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
new file mode 100644
index 000000000000..71d8f3ba7d6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "netns_cookie_prog.skel.h"
+#include "network_helpers.h"
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+static int duration;
+
+void test_netns_cookie(void)
+{
+ int server_fd = -1, client_fd = -1, cgroup_fd = -1;
+ int err, val, ret, map, verdict;
+ struct netns_cookie_prog *skel;
+ uint64_t cookie_expected_value;
+ socklen_t vallen = sizeof(cookie_expected_value);
+ static const char send_msg[] = "message";
+
+ skel = netns_cookie_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ cgroup_fd = test__join_cgroup("/netns_cookie");
+ if (CHECK(cgroup_fd < 0, "join_cgroup", "cgroup creation failed\n"))
+ goto done;
+
+ skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup(
+ skel->progs.get_netns_cookie_sockops, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach"))
+ goto done;
+
+ verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg);
+ map = bpf_map__fd(skel->maps.sock_map);
+ err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "prog_attach"))
+ goto done;
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
+ goto done;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno))
+ goto done;
+
+ ret = send(client_fd, send_msg, sizeof(send_msg), 0);
+ if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", ret))
+ goto done;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sockops_netns_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(sockops_netns_cookies)"))
+ goto done;
+
+ err = getsockopt(client_fd, SOL_SOCKET, SO_NETNS_COOKIE,
+ &cookie_expected_value, &vallen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto done;
+
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)"))
+ goto done;
+
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+
+done:
+ if (server_fd != -1)
+ close(server_fd);
+ if (client_fd != -1)
+ close(client_fd);
+ if (cgroup_fd != -1)
+ close(cgroup_fd);
+ netns_cookie_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/obj_name.c b/tools/testing/selftests/bpf/prog_tests/obj_name.c
index e178416bddad..6194b776a28b 100644
--- a/tools/testing/selftests/bpf/prog_tests/obj_name.c
+++ b/tools/testing/selftests/bpf/prog_tests/obj_name.c
@@ -38,13 +38,13 @@ void test_obj_name(void)
fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
CHECK((tests[i].success && fd < 0) ||
- (!tests[i].success && fd != -1) ||
+ (!tests[i].success && fd >= 0) ||
(!tests[i].success && errno != tests[i].expected_errno),
"check-bpf-prog-name",
"fd %d(%d) errno %d(%d)\n",
fd, tests[i].success, errno, tests[i].expected_errno);
- if (fd != -1)
+ if (fd >= 0)
close(fd);
/* test different attr.map_name during BPF_MAP_CREATE */
@@ -59,13 +59,13 @@ void test_obj_name(void)
memcpy(attr.map_name, tests[i].name, ncopy);
fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
CHECK((tests[i].success && fd < 0) ||
- (!tests[i].success && fd != -1) ||
+ (!tests[i].success && fd >= 0) ||
(!tests[i].success && errno != tests[i].expected_errno),
"check-bpf-map-name",
"fd %d(%d) errno %d(%d)\n",
fd, tests[i].success, errno, tests[i].expected_errno);
- if (fd != -1)
+ if (fd >= 0)
close(fd);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index e35c444902a7..12c4f45cee1a 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -74,7 +74,7 @@ static void test_perf_branches_common(int perf_fd,
/* attach perf_event */
link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd);
- if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
goto out_destroy_skel;
/* generate some branches on cpu 0 */
@@ -119,7 +119,7 @@ static void test_perf_branches_hw(void)
* Some setups don't support branch records (virtual machines, !x86),
* so skip test in this case.
*/
- if (pfd == -1) {
+ if (pfd < 0) {
if (errno == ENOENT || errno == EOPNOTSUPP) {
printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
__func__);
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index ca9f0895ec84..6490e9673002 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -80,7 +80,7 @@ void test_perf_buffer(void)
pb_opts.sample_cb = on_sample;
pb_opts.ctx = &cpu_seen;
pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out_close;
CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
index 72c3690844fb..33144c9432ae 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -97,8 +97,7 @@ void test_perf_event_stackmap(void)
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
close(pmu_fd);
goto cleanup;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
new file mode 100644
index 000000000000..b1abd0c46607
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "test_perf_link.skel.h"
+
+static void burn_cpu(void)
+{
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+ int i, err;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ ASSERT_OK(err, "set_thread_affinity");
+
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+}
+
+void test_perf_link(void)
+{
+ struct test_perf_link *skel = NULL;
+ struct perf_event_attr attr;
+ int pfd = -1, link_fd = -1, err;
+ int run_cnt_before, run_cnt_after;
+ struct bpf_link_info info;
+ __u32 info_len = sizeof(info);
+
+ /* create perf event */
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(pfd, 0, "perf_fd"))
+ goto cleanup;
+
+ skel = test_perf_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ link_fd = bpf_link_create(bpf_program__fd(skel->progs.handler), pfd,
+ BPF_PERF_EVENT, NULL);
+ if (!ASSERT_GE(link_fd, 0, "link_fd"))
+ goto cleanup;
+
+ memset(&info, 0, sizeof(info));
+ err = bpf_obj_get_info_by_fd(link_fd, &info, &info_len);
+ if (!ASSERT_OK(err, "link_get_info"))
+ goto cleanup;
+
+ ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "link_type");
+ ASSERT_GT(info.id, 0, "link_id");
+ ASSERT_GT(info.prog_id, 0, "link_prog_id");
+
+ /* ensure we get at least one perf_event prog execution */
+ burn_cpu();
+ ASSERT_GT(skel->bss->run_cnt, 0, "run_cnt");
+
+ /* perf_event is still active, but we close link and BPF program
+ * shouldn't be executed anymore
+ */
+ close(link_fd);
+ link_fd = -1;
+
+ /* make sure there are no stragglers */
+ kern_sync_rcu();
+
+ run_cnt_before = skel->bss->run_cnt;
+ burn_cpu();
+ run_cnt_after = skel->bss->run_cnt;
+
+ ASSERT_EQ(run_cnt_before, run_cnt_after, "run_cnt_before_after");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ if (pfd >= 0)
+ close(pfd);
+ test_perf_link__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
index fcf54b3a1dd0..d4b953ae3407 100644
--- a/tools/testing/selftests/bpf/prog_tests/pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -125,6 +125,10 @@ void test_pinning(void)
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
goto out;
+ /* get pinning path */
+ if (!ASSERT_STREQ(bpf_map__pin_path(map), pinpath, "get pin path"))
+ goto out;
+
/* set pinning path of other map and re-pin all */
map = bpf_object__find_map_by_name(obj, "nopinmap");
if (CHECK(!map, "find map", "NULL map"))
@@ -134,6 +138,11 @@ void test_pinning(void)
if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
goto out;
+ /* get pinning path after set */
+ if (!ASSERT_STREQ(bpf_map__pin_path(map), custpinpath,
+ "get pin path after set"))
+ goto out;
+
/* should only pin the one unpinned map */
err = bpf_object__pin_maps(obj, NULL);
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
index 7aecfd9e87d1..95bd12097358 100644
--- a/tools/testing/selftests/bpf/prog_tests/probe_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -15,7 +15,7 @@ void test_probe_user(void)
static const int zero = 0;
obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
@@ -33,11 +33,8 @@ void test_probe_user(void)
goto cleanup;
kprobe_link = bpf_program__attach(kprobe_prog);
- if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
- "err %ld\n", PTR_ERR(kprobe_link))) {
- kprobe_link = NULL;
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
goto cleanup;
- }
memset(&curr, 0, sizeof(curr));
in->sin_family = AF_INET;
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index 131d7f7eeb42..89fc98faf19e 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -46,7 +46,7 @@ void test_prog_run_xattr(void)
tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
+ CHECK_ATTR(err >= 0 || errno != ENOSPC || tattr.retval, "run",
"err %d errno %d retval %d\n", err, errno, tattr.retval);
CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
@@ -78,6 +78,6 @@ void test_prog_run_xattr(void)
cleanup:
if (skel)
test_pkt_access__destroy(skel);
- if (stats_fd != -1)
+ if (stats_fd >= 0)
close(stats_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
index c5fb191874ac..41720a62c4fa 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -77,7 +77,7 @@ void test_raw_tp_test_run(void)
/* invalid cpu ID should fail with ENXIO */
opts.cpu = 0xffffffff;
err = bpf_prog_test_run_opts(prog_fd, &opts);
- CHECK(err != -1 || errno != ENXIO,
+ CHECK(err >= 0 || errno != ENXIO,
"test_run_opts_fail",
"should failed with ENXIO\n");
@@ -85,7 +85,7 @@ void test_raw_tp_test_run(void)
opts.cpu = 1;
opts.flags = 0;
err = bpf_prog_test_run_opts(prog_fd, &opts);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"test_run_opts_fail",
"should failed with EINVAL\n");
diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
index 563e12120e77..5f9eaa3ab584 100644
--- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
+++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
@@ -30,7 +30,7 @@ void test_rdonly_maps(void)
struct bss bss;
obj = bpf_object__open_file(file, NULL);
- if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
return;
err = bpf_object__load(obj);
@@ -58,11 +58,8 @@ void test_rdonly_maps(void)
goto cleanup;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
- if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n",
- t->prog_name, PTR_ERR(link))) {
- link = NULL;
+ if (!ASSERT_OK_PTR(link, "attach_prog"))
goto cleanup;
- }
/* trigger probe */
usleep(1);
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index ac1ee10cffd8..4e91f4d6466c 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -15,7 +15,7 @@ void test_reference_tracking(void)
int err = 0;
obj = bpf_object__open_file(file, &open_opts);
- if (CHECK_FAIL(IS_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
@@ -34,8 +34,8 @@ void test_reference_tracking(void)
if (!test__start_subtest(title))
continue;
- /* Expect verifier failure if test name has 'fail' */
- if (strstr(title, "fail") != NULL) {
+ /* Expect verifier failure if test name has 'err' */
+ if (strstr(title, "err_") != NULL) {
libbpf_print_fn_t old_print_fn;
old_print_fn = libbpf_set_print(NULL);
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index d3c2de2c24d1..f62361306f6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -76,7 +76,7 @@ __resolve_symbol(struct btf *btf, int type_id)
}
for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
- if (test_symbols[i].id != -1)
+ if (test_symbols[i].id >= 0)
continue;
if (BTF_INFO_KIND(type->info) != test_symbols[i].type)
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index f9a8ae331963..4706cee84360 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -12,7 +12,7 @@
#include <sys/sysinfo.h>
#include <linux/perf_event.h>
#include <linux/ring_buffer.h>
-#include "test_ringbuf.skel.h"
+#include "test_ringbuf.lskel.h"
#define EDONE 7777
@@ -94,15 +94,13 @@ void test_ringbuf(void)
if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
- err = bpf_map__set_max_entries(skel->maps.ringbuf, page_size);
- if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
- goto cleanup;
+ skel->maps.ringbuf.max_entries = page_size;
err = test_ringbuf__load(skel);
if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
goto cleanup;
- rb_fd = bpf_map__fd(skel->maps.ringbuf);
+ rb_fd = skel->maps.ringbuf.map_fd;
/* good read/write cons_pos */
mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos");
@@ -151,7 +149,7 @@ void test_ringbuf(void)
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
- ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
+ ringbuf = ring_buffer__new(skel->maps.ringbuf.map_fd,
process_sample, NULL, NULL);
if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index cef63e703924..167cd8a2edfd 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -63,7 +63,7 @@ void test_ringbuf_multi(void)
goto cleanup;
proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0);
- if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n"))
+ if (CHECK(proto_fd < 0, "bpf_create_map", "bpf_create_map failed\n"))
goto cleanup;
err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 821b4146b7b6..4efd337d6a3c 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -78,7 +78,7 @@ static int create_maps(enum bpf_map_type inner_type)
attr.max_entries = REUSEPORT_ARRAY_SIZE;
reuseport_array = bpf_create_map_xattr(&attr);
- RET_ERR(reuseport_array == -1, "creating reuseport_array",
+ RET_ERR(reuseport_array < 0, "creating reuseport_array",
"reuseport_array:%d errno:%d\n", reuseport_array, errno);
/* Creating outer_map */
@@ -89,7 +89,7 @@ static int create_maps(enum bpf_map_type inner_type)
attr.max_entries = 1;
attr.inner_map_fd = reuseport_array;
outer_map = bpf_create_map_xattr(&attr);
- RET_ERR(outer_map == -1, "creating outer_map",
+ RET_ERR(outer_map < 0, "creating outer_map",
"outer_map:%d errno:%d\n", outer_map, errno);
return 0;
@@ -102,8 +102,9 @@ static int prepare_bpf_obj(void)
int err;
obj = bpf_object__open("test_select_reuseport_kern.o");
- RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
- "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+ err = libbpf_get_error(obj);
+ RET_ERR(err, "open test_select_reuseport_kern.o",
+ "obj:%p PTR_ERR(obj):%d\n", obj, err);
map = bpf_object__find_map_by_name(obj, "outer_map");
RET_ERR(!map, "find outer_map", "!map\n");
@@ -116,31 +117,31 @@ static int prepare_bpf_obj(void)
prog = bpf_program__next(NULL, obj);
RET_ERR(!prog, "get first bpf_program", "!prog\n");
select_by_skb_data_prog = bpf_program__fd(prog);
- RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
+ RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
"select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
map = bpf_object__find_map_by_name(obj, "result_map");
RET_ERR(!map, "find result_map", "!map\n");
result_map = bpf_map__fd(map);
- RET_ERR(result_map == -1, "get result_map fd",
+ RET_ERR(result_map < 0, "get result_map fd",
"result_map:%d\n", result_map);
map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
tmp_index_ovr_map = bpf_map__fd(map);
- RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+ RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd",
"tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
map = bpf_object__find_map_by_name(obj, "linum_map");
RET_ERR(!map, "find linum_map", "!map\n");
linum_map = bpf_map__fd(map);
- RET_ERR(linum_map == -1, "get linum_map fd",
+ RET_ERR(linum_map < 0, "get linum_map fd",
"linum_map:%d\n", linum_map);
map = bpf_object__find_map_by_name(obj, "data_check_map");
RET_ERR(!map, "find data_check_map", "!map\n");
data_check_map = bpf_map__fd(map);
- RET_ERR(data_check_map == -1, "get data_check_map fd",
+ RET_ERR(data_check_map < 0, "get data_check_map fd",
"data_check_map:%d\n", data_check_map);
return 0;
@@ -237,7 +238,7 @@ static long get_linum(void)
int err;
err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
- RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+ RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n",
err, errno);
return linum;
@@ -254,11 +255,11 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
addrlen = sizeof(cli_sa);
err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
&addrlen);
- RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+ RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n",
err, errno);
err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
- RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+ RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
err, errno);
if (type == SOCK_STREAM) {
@@ -347,7 +348,7 @@ static void check_results(void)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &results[i]);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
}
@@ -524,12 +525,12 @@ static void test_syncookie(int type, sa_family_t family)
*/
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
&tmp_index, BPF_ANY);
- RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+ RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)",
"err:%d errno:%d\n", err, errno);
do_test(type, family, &cmd, PASS);
err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
&tmp_index);
- RET_IF(err == -1 || tmp_index != -1,
+ RET_IF(err < 0 || tmp_index >= 0,
"lookup_elem(tmp_index_ovr_map)",
"err:%d errno:%d tmp_index:%d\n",
err, errno, tmp_index);
@@ -569,7 +570,7 @@ static void test_detach_bpf(int type, sa_family_t family)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
nr_run_before += tmp;
}
@@ -584,7 +585,7 @@ static void test_detach_bpf(int type, sa_family_t family)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
nr_run_after += tmp;
}
@@ -632,24 +633,24 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
SO_ATTACH_REUSEPORT_EBPF,
&select_by_skb_data_prog,
sizeof(select_by_skb_data_prog));
- RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+ RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
"err:%d errno:%d\n", err, errno);
}
err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
- RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+ RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
if (type == SOCK_STREAM) {
err = listen(sk_fds[i], 10);
- RET_IF(err == -1, "listen()",
+ RET_IF(err < 0, "listen()",
"sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
}
err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
BPF_NOEXIST);
- RET_IF(err == -1, "update_elem(reuseport_array)",
+ RET_IF(err < 0, "update_elem(reuseport_array)",
"sk_fds[%d] err:%d errno:%d\n", i, err, errno);
if (i == first) {
@@ -682,7 +683,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
prepare_sk_fds(type, family, inany);
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
BPF_ANY);
- RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+ RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)",
"err:%d errno:%d\n", err, errno);
/* Install reuseport_array to outer_map? */
@@ -691,7 +692,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
BPF_ANY);
- RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
+ RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)",
"err:%d errno:%d\n", err, errno);
}
@@ -720,18 +721,18 @@ static void cleanup_per_test(bool no_inner_map)
return;
err = bpf_map_delete_elem(outer_map, &index_zero);
- RET_IF(err == -1, "delete_elem(outer_map)",
+ RET_IF(err < 0, "delete_elem(outer_map)",
"err:%d errno:%d\n", err, errno);
}
static void cleanup(void)
{
- if (outer_map != -1) {
+ if (outer_map >= 0) {
close(outer_map);
outer_map = -1;
}
- if (reuseport_array != -1) {
+ if (reuseport_array >= 0) {
close(reuseport_array);
reuseport_array = -1;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 7043e6ded0e6..776916b61c40 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <sys/time.h>
+#include <sys/resource.h>
#include "test_send_signal_kern.skel.h"
-static volatile int sigusr1_received = 0;
+int sigusr1_received = 0;
static void sigusr1_handler(int signum)
{
@@ -10,29 +12,25 @@ static void sigusr1_handler(int signum)
}
static void test_send_signal_common(struct perf_event_attr *attr,
- bool signal_thread,
- const char *test_name)
+ bool signal_thread)
{
struct test_send_signal_kern *skel;
int pipe_c2p[2], pipe_p2c[2];
int err = -1, pmu_fd = -1;
- __u32 duration = 0;
char buf[256];
pid_t pid;
- if (CHECK(pipe(pipe_c2p), test_name,
- "pipe pipe_c2p error: %s\n", strerror(errno)))
+ if (!ASSERT_OK(pipe(pipe_c2p), "pipe_c2p"))
return;
- if (CHECK(pipe(pipe_p2c), test_name,
- "pipe pipe_p2c error: %s\n", strerror(errno))) {
+ if (!ASSERT_OK(pipe(pipe_p2c), "pipe_p2c")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
return;
}
pid = fork();
- if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) {
+ if (!ASSERT_GE(pid, 0, "fork")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -41,26 +39,40 @@ static void test_send_signal_common(struct perf_event_attr *attr,
}
if (pid == 0) {
+ int old_prio;
+
/* install signal handler and notify parent */
signal(SIGUSR1, sigusr1_handler);
close(pipe_c2p[0]); /* close read */
close(pipe_p2c[1]); /* close write */
+ /* boost with a high priority so we got a higher chance
+ * that if an interrupt happens, the underlying task
+ * is this process.
+ */
+ errno = 0;
+ old_prio = getpriority(PRIO_PROCESS, 0);
+ ASSERT_OK(errno, "getpriority");
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority");
+
/* notify parent signal handler is installed */
- CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* make sure parent enabled bpf program to send_signal */
- CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
/* wait a little for signal handler */
sleep(1);
buf[0] = sigusr1_received ? '2' : '0';
- CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* wait for parent notification and exit */
- CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
+
+ /* restore the old priority */
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority");
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -71,33 +83,31 @@ static void test_send_signal_common(struct perf_event_attr *attr,
close(pipe_p2c[0]); /* close read */
skel = test_send_signal_kern__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
goto skel_open_load_failure;
if (!attr) {
err = test_send_signal_kern__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed\n")) {
+ if (!ASSERT_OK(err, "skel_attach")) {
err = -1;
goto destroy_skel;
}
} else {
pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
-1 /* group id */, 0 /* flags */);
- if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
- strerror(errno))) {
+ if (!ASSERT_GE(pmu_fd, 0, "perf_event_open")) {
err = -1;
goto destroy_skel;
}
skel->links.send_signal_perf =
bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd);
- if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.send_signal_perf)))
+ if (!ASSERT_OK_PTR(skel->links.send_signal_perf, "attach_perf_event"))
goto disable_pmu;
}
/* wait until child signal handler installed */
- CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
+ ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read");
/* trigger the bpf send_signal */
skel->bss->pid = pid;
@@ -105,21 +115,21 @@ static void test_send_signal_common(struct perf_event_attr *attr,
skel->bss->signal_thread = signal_thread;
/* notify child that bpf program can send_signal now */
- CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
/* wait for result */
err = read(pipe_c2p[0], buf, 1);
- if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno)))
+ if (!ASSERT_GE(err, 0, "reading pipe"))
goto disable_pmu;
- if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) {
+ if (!ASSERT_GT(err, 0, "reading pipe error: size 0")) {
err = -1;
goto disable_pmu;
}
- CHECK(buf[0] != '2', test_name, "incorrect result\n");
+ ASSERT_EQ(buf[0], '2', "incorrect result");
/* notify child safe to exit */
- CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
+ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
disable_pmu:
close(pmu_fd);
@@ -133,7 +143,7 @@ skel_open_load_failure:
static void test_send_signal_tracepoint(bool signal_thread)
{
- test_send_signal_common(NULL, signal_thread, "tracepoint");
+ test_send_signal_common(NULL, signal_thread);
}
static void test_send_signal_perf(bool signal_thread)
@@ -144,7 +154,7 @@ static void test_send_signal_perf(bool signal_thread)
.config = PERF_COUNT_SW_CPU_CLOCK,
};
- test_send_signal_common(&attr, signal_thread, "perf_sw_event");
+ test_send_signal_common(&attr, signal_thread);
}
static void test_send_signal_nmi(bool signal_thread)
@@ -173,7 +183,7 @@ static void test_send_signal_nmi(bool signal_thread)
close(pmu_fd);
}
- test_send_signal_common(&attr, signal_thread, "perf_hw_event");
+ test_send_signal_common(&attr, signal_thread);
}
void test_send_signal(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 45c82db3c58c..aee41547e7f4 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -480,7 +480,7 @@ static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
}
link = bpf_program__attach_netns(prog, net_fd);
- if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) {
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_netns")) {
errno = -PTR_ERR(link);
log_err("failed to attach program '%s' to netns",
bpf_program__name(prog));
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index fe87b77af459..f6f130c99b8c 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -82,10 +82,8 @@ void test_skeleton(void)
CHECK(data->out2 != 2, "res2", "got %lld != exp %d\n", data->out2, 2);
CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3);
CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4);
- CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n",
- bss->handler_out5.a, 5);
- CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n",
- bss->handler_out5.b, 6);
+ CHECK(bss->out5.a != 5, "res5", "got %d != exp %d\n", bss->out5.a, 5);
+ CHECK(bss->out5.b != 6, "res6", "got %lld != exp %d\n", bss->out5.b, 6);
CHECK(bss->out6 != 14, "res7", "got %d != exp %d\n", bss->out6, 14);
CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1",
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index dffbcaa1ec98..8fd1b4b29a0e 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -19,7 +19,7 @@
#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
-#define EXP_STR_OUT "str1 longstr"
+#define EXP_STR_OUT "str1 a b c d e longstr"
#define EXP_STR_RET sizeof(EXP_STR_OUT)
#define EXP_OVER_OUT "%over"
@@ -114,6 +114,8 @@ void test_snprintf_negative(void)
ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3");
ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4");
ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
+ ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6");
+ ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index af87118e748e..577d619fb07e 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -97,12 +97,12 @@ static void check_result(void)
err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
&egress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
&ingress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+ CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
"err:%d errno:%d\n", err, errno);
memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
@@ -355,14 +355,12 @@ void test_sock_fields(void)
egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
child_cg_fd);
- if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n",
- PTR_ERR(egress_link)))
+ if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
goto done;
ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
child_cg_fd);
- if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n",
- PTR_ERR(ingress_link)))
+ if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
goto done;
linum_map_fd = bpf_map__fd(skel->maps.linum_map);
@@ -375,8 +373,8 @@ done:
bpf_link__destroy(egress_link);
bpf_link__destroy(ingress_link);
test_sock_fields__destroy(skel);
- if (child_cg_fd != -1)
+ if (child_cg_fd >= 0)
close(child_cg_fd);
- if (parent_cg_fd != -1)
+ if (parent_cg_fd >= 0)
close(parent_cg_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index ab77596b64e3..1352ec104149 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -88,11 +88,11 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type)
int s, map, err;
s = connected_socket_v4();
- if (CHECK_FAIL(s == -1))
+ if (CHECK_FAIL(s < 0))
return;
map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
- if (CHECK_FAIL(map == -1)) {
+ if (CHECK_FAIL(map < 0)) {
perror("bpf_create_map");
goto out;
}
@@ -245,7 +245,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.copy, &opts);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -304,7 +304,7 @@ static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
}
err = bpf_prog_attach(verdict, map, second, 0);
- assert(err == -1 && errno == EBUSY);
+ ASSERT_EQ(err, -EBUSY, "prog_attach_fail");
err = bpf_prog_detach2(verdict, map, first);
if (CHECK_FAIL(err)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 06b86addc181..7a0d64fdc192 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -98,7 +98,7 @@ static void run_tests(int family, enum bpf_map_type map_type)
int map;
map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
- if (CHECK_FAIL(map == -1)) {
+ if (CHECK_FAIL(map < 0)) {
perror("bpf_map_create");
return;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 648d9ae898d2..d88bb65b74cc 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -139,7 +139,7 @@
#define xbpf_map_delete_elem(fd, key) \
({ \
int __ret = bpf_map_delete_elem((fd), (key)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("map_delete"); \
__ret; \
})
@@ -147,7 +147,7 @@
#define xbpf_map_lookup_elem(fd, key, val) \
({ \
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("map_lookup"); \
__ret; \
})
@@ -155,7 +155,7 @@
#define xbpf_map_update_elem(fd, key, val, flags) \
({ \
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("map_update"); \
__ret; \
})
@@ -164,7 +164,7 @@
({ \
int __ret = \
bpf_prog_attach((prog), (target), (type), (flags)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_attach(" #type ")"); \
__ret; \
})
@@ -172,7 +172,7 @@
#define xbpf_prog_detach2(prog, target, type) \
({ \
int __ret = bpf_prog_detach2((prog), (target), (type)); \
- if (__ret == -1) \
+ if (__ret < 0) \
FAIL_ERRNO("prog_detach2(" #type ")"); \
__ret; \
})
@@ -351,9 +351,11 @@ static void test_insert_opened(int family, int sotype, int mapfd)
errno = 0;
value = s;
err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
- if (!err || errno != EOPNOTSUPP)
- FAIL_ERRNO("map_update: expected EOPNOTSUPP");
-
+ if (sotype == SOCK_STREAM) {
+ if (!err || errno != EOPNOTSUPP)
+ FAIL_ERRNO("map_update: expected EOPNOTSUPP");
+ } else if (err)
+ FAIL_ERRNO("map_update: expected success");
xclose(s);
}
@@ -919,6 +921,23 @@ static const char *redir_mode_str(enum redir_mode mode)
}
}
+static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
+{
+ u64 value;
+ u32 key;
+ int err;
+
+ key = 0;
+ value = fd1;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ return err;
+
+ key = 1;
+ value = fd2;
+ return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+}
+
static void redir_to_connected(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
@@ -928,7 +947,6 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
unsigned int pass;
socklen_t len;
int err, n;
- u64 value;
u32 key;
char b;
@@ -965,15 +983,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (p1 < 0)
goto close_cli1;
- key = 0;
- value = p0;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer1;
-
- key = 1;
- value = p1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
goto close_peer1;
@@ -991,12 +1001,11 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
goto close_peer1;
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
- n = read(c0, &b, 1);
+ n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
if (n < 0)
- FAIL_ERRNO("%s: read", log_prefix);
+ FAIL_ERRNO("%s: recv_timeout", log_prefix);
if (n == 0)
- FAIL("%s: incomplete read", log_prefix);
+ FAIL("%s: incomplete recv", log_prefix);
close_peer1:
xclose(p1);
@@ -1061,7 +1070,6 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
int s, c, p, err, n;
unsigned int drop;
socklen_t len;
- u64 value;
u32 key;
zero_verdict_count(verd_mapfd);
@@ -1086,15 +1094,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
if (p < 0)
goto close_cli;
- key = 0;
- value = s;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer;
-
- key = 1;
- value = p;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_mapfd, s, p);
if (err)
goto close_peer;
@@ -1346,7 +1346,6 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
int s1, s2, c, err;
unsigned int drop;
socklen_t len;
- u64 value;
u32 key;
zero_verdict_count(verd_map);
@@ -1360,16 +1359,10 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
if (s2 < 0)
goto close_srv1;
- key = 0;
- value = s1;
- err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_map, s1, s2);
if (err)
goto close_srv2;
- key = 1;
- value = s2;
- err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
-
/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
len = sizeof(addr);
err = xgetsockname(s2, sockaddr(&addr), &len);
@@ -1441,6 +1434,8 @@ static const char *family_str(sa_family_t family)
return "IPv4";
case AF_INET6:
return "IPv6";
+ case AF_UNIX:
+ return "Unix";
default:
return "unknown";
}
@@ -1563,6 +1558,94 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
}
}
+static void unix_redir_to_connected(int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int err, n;
+ int sfd[2];
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+ return;
+ c0 = sfd[0], p0 = sfd[1];
+
+ if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+ goto close0;
+ c1 = sfd[0], p1 = sfd[1];
+
+ err = add_to_sockmap(sock_mapfd, p0, p1);
+ if (err)
+ goto close;
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+ n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
+ if (n < 0)
+ FAIL_ERRNO("%s: recv_timeout", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete recv", log_prefix);
+
+close:
+ xclose(c1);
+ xclose(p1);
+close0:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int sotype)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(AF_UNIX);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ unix_skb_redir_to_connected(skel, map, sotype);
+}
+
static void test_reuseport(struct test_sockmap_listen *skel,
struct bpf_map *map, int family, int sotype)
{
@@ -1603,32 +1686,27 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
-static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
- int verd_mapfd, enum redir_mode mode)
+static int inet_socketpair(int family, int type, int *s, int *c)
{
- const char *log_prefix = redir_mode_str(mode);
struct sockaddr_storage addr;
- int c0, c1, p0, p1;
- unsigned int pass;
socklen_t len;
- int err, n;
- u64 value;
- u32 key;
- char b;
-
- zero_verdict_count(verd_mapfd);
+ int p0, c0;
+ int err;
- p0 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ p0 = socket_loopback(family, type | SOCK_NONBLOCK);
if (p0 < 0)
- return;
+ return p0;
+
len = sizeof(addr);
err = xgetsockname(p0, sockaddr(&addr), &len);
if (err)
goto close_peer0;
- c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
- if (c0 < 0)
+ c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
+ if (c0 < 0) {
+ err = c0;
goto close_peer0;
+ }
err = xconnect(c0, sockaddr(&addr), len);
if (err)
goto close_cli0;
@@ -1639,35 +1717,125 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
if (err)
goto close_cli0;
- p1 = socket_loopback(family, sotype | SOCK_NONBLOCK);
- if (p1 < 0)
- goto close_cli0;
- err = xgetsockname(p1, sockaddr(&addr), &len);
+ *s = p0;
+ *c = c0;
+ return 0;
+
+close_cli0:
+ xclose(c0);
+close_peer0:
+ xclose(p0);
+ return err;
+}
+
+static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
+ enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int err, n;
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+ if (err)
+ return;
+ err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
if (err)
goto close_cli0;
- c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
- if (c1 < 0)
- goto close_peer1;
- err = xconnect(c1, sockaddr(&addr), len);
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
goto close_cli1;
- err = xgetsockname(c1, sockaddr(&addr), &len);
- if (err)
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
goto close_cli1;
- err = xconnect(p1, sockaddr(&addr), len);
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
if (err)
goto close_cli1;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
- key = 0;
- value = p0;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
+ if (n < 0)
+ FAIL_ERRNO("%s: recv_timeout", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete recv", log_prefix);
+
+close_cli1:
+ xclose(c1);
+ xclose(p1);
+close_cli0:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
if (err)
- goto close_cli1;
+ return;
- key = 1;
- value = p1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ skel->bss->test_ingress = false;
+ udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ udp_skb_redir_to_connected(skel, map, family);
+}
+
+static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int err, n;
+ int sfd[2];
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ return;
+ c0 = sfd[0], p0 = sfd[1];
+
+ err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
+ if (err)
+ goto close;
+
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
goto close_cli1;
@@ -1686,24 +1854,103 @@ static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
- n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
if (n < 0)
- FAIL_ERRNO("%s: read", log_prefix);
+ FAIL_ERRNO("%s: recv_timeout", log_prefix);
if (n == 0)
- FAIL("%s: incomplete read", log_prefix);
+ FAIL("%s: incomplete recv", log_prefix);
close_cli1:
xclose(c1);
-close_peer1:
+ xclose(p1);
+close:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+ inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ int err, n;
+ int sfd[2];
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+ if (err)
+ return;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ goto close_cli0;
+ c1 = sfd[0], p1 = sfd[1];
+
+ err = add_to_sockmap(sock_mapfd, p0, p1);
+ if (err)
+ goto close;
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+ n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC);
+ if (n < 0)
+ FAIL_ERRNO("%s: recv_timeout", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete recv", log_prefix);
+
+close:
+ xclose(c1);
xclose(p1);
close_cli0:
xclose(c0);
-close_peer0:
xclose(p0);
+
}
-static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
- struct bpf_map *inner_map, int family)
+static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
{
int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
@@ -1715,17 +1962,21 @@ static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
return;
skel->bss->test_ingress = false;
- udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_EGRESS);
+ unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_EGRESS);
skel->bss->test_ingress = true;
- udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
- REDIR_INGRESS);
+ unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_INGRESS);
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
-static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
- int family)
+static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
{
const char *family_name, *map_name;
char s[MAX_TEST_NAME];
@@ -1735,7 +1986,8 @@ static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map
snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
if (!test__start_subtest(s))
return;
- udp_skb_redir_to_connected(skel, map, family);
+ inet_unix_skb_redir_to_connected(skel, map, family);
+ unix_inet_skb_redir_to_connected(skel, map, family);
}
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
@@ -1747,6 +1999,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
test_udp_redir(skel, map, family);
+ test_udp_unix_redir(skel, map, family);
}
void test_sockmap_listen(void)
@@ -1762,10 +2015,14 @@ void test_sockmap_listen(void)
skel->bss->test_sockmap = true;
run_tests(skel, skel->maps.sock_map, AF_INET);
run_tests(skel, skel->maps.sock_map, AF_INET6);
+ test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
+ test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
skel->bss->test_sockmap = false;
run_tests(skel, skel->maps.sock_hash, AF_INET);
run_tests(skel, skel->maps.sock_hash, AF_INET6);
+ test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
+ test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
test_sockmap_listen__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index ec281b0363b8..86f97681ad89 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -195,8 +195,10 @@ static void run_test(int cgroup_fd)
pthread_mutex_lock(&server_started_mtx);
if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
- (void *)&server_fd)))
+ (void *)&server_fd))) {
+ pthread_mutex_unlock(&server_started_mtx);
goto close_server_fd;
+ }
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
new file mode 100644
index 000000000000..6b53b3cb8dad
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <netinet/tcp.h>
+#include "sockopt_qos_to_cc.skel.h"
+
+static void run_setsockopt_test(int cg_fd, int sock_fd)
+{
+ socklen_t optlen;
+ char cc[16]; /* TCP_CA_NAME_MAX */
+ int buf;
+ int err = -1;
+
+ buf = 0x2D;
+ err = setsockopt(sock_fd, SOL_IPV6, IPV6_TCLASS, &buf, sizeof(buf));
+ if (!ASSERT_OK(err, "setsockopt(sock_fd, IPV6_TCLASS)"))
+ return;
+
+ /* Verify the setsockopt cc change */
+ optlen = sizeof(cc);
+ err = getsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, cc, &optlen);
+ if (!ASSERT_OK(err, "getsockopt(sock_fd, TCP_CONGESTION)"))
+ return;
+
+ if (!ASSERT_STREQ(cc, "reno", "getsockopt(sock_fd, TCP_CONGESTION)"))
+ return;
+}
+
+void test_sockopt_qos_to_cc(void)
+{
+ struct sockopt_qos_to_cc *skel;
+ char cc_cubic[16] = "cubic"; /* TCP_CA_NAME_MAX */
+ int cg_fd = -1;
+ int sock_fd = -1;
+ int err;
+
+ cg_fd = test__join_cgroup("/sockopt_qos_to_cc");
+ if (!ASSERT_GE(cg_fd, 0, "cg-join(sockopt_qos_to_cc)"))
+ return;
+
+ skel = sockopt_qos_to_cc__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto done;
+
+ sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "v6 socket open"))
+ goto done;
+
+ err = setsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, &cc_cubic,
+ sizeof(cc_cubic));
+ if (!ASSERT_OK(err, "setsockopt(sock_fd, TCP_CONGESTION)"))
+ goto done;
+
+ skel->links.sockopt_qos_to_cc =
+ bpf_program__attach_cgroup(skel->progs.sockopt_qos_to_cc,
+ cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.sockopt_qos_to_cc,
+ "prog_attach(sockopt_qos_to_cc)"))
+ goto done;
+
+ run_setsockopt_test(cg_fd, sock_fd);
+
+done:
+ if (sock_fd != -1)
+ close(sock_fd);
+ if (cg_fd != -1)
+ close(cg_fd);
+ /* destroy can take null and error pointer */
+ sockopt_qos_to_cc__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 11a769e18f5d..0a91d8d9954b 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -62,8 +62,7 @@ retry:
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
close(pmu_fd);
goto cleanup;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index 37269d23df93..04b476bd62b9 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -21,7 +21,7 @@ void test_stacktrace_map(void)
goto close_prog;
link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
- if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_tp"))
goto close_prog;
/* find map fds */
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index 404a5498e1a3..4fd30bb651ad 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -21,7 +21,7 @@ void test_stacktrace_map_raw_tp(void)
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, "sched_switch");
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
/* find map fds */
@@ -59,7 +59,6 @@ void test_stacktrace_map_raw_tp(void)
goto close_prog;
close_prog:
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
+ bpf_link__destroy(link);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c
index 46556976dccc..5c4e3014e063 100644
--- a/tools/testing/selftests/bpf/prog_tests/static_linked.c
+++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c
@@ -14,12 +14,7 @@ void test_static_linked(void)
return;
skel->rodata->rovar1 = 1;
- skel->bss->static_var1 = 2;
- skel->bss->static_var11 = 3;
-
skel->rodata->rovar2 = 4;
- skel->bss->static_var2 = 5;
- skel->bss->static_var22 = 6;
err = test_static_linked__load(skel);
if (!ASSERT_OK(err, "skel_load"))
@@ -32,8 +27,8 @@ void test_static_linked(void)
/* trigger */
usleep(1);
- ASSERT_EQ(skel->bss->var1, 1 * 2 + 2 + 3, "var1");
- ASSERT_EQ(skel->bss->var2, 4 * 3 + 5 + 6, "var2");
+ ASSERT_EQ(skel->data->var1, 1 * 2 + 2 + 3, "var1");
+ ASSERT_EQ(skel->data->var2, 4 * 3 + 5 + 6, "var2");
cleanup:
test_static_linked__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c
new file mode 100644
index 000000000000..81e997a69f7a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/syscall.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "syscall.skel.h"
+
+struct args {
+ __u64 log_buf;
+ __u32 log_size;
+ int max_entries;
+ int map_fd;
+ int prog_fd;
+ int btf_fd;
+};
+
+void test_syscall(void)
+{
+ static char verifier_log[8192];
+ struct args ctx = {
+ .max_entries = 1024,
+ .log_buf = (uintptr_t) verifier_log,
+ .log_size = sizeof(verifier_log),
+ };
+ struct bpf_prog_test_run_attr tattr = {
+ .ctx_in = &ctx,
+ .ctx_size_in = sizeof(ctx),
+ };
+ struct syscall *skel = NULL;
+ __u64 key = 12, value = 0;
+ int err;
+
+ skel = syscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ tattr.prog_fd = bpf_program__fd(skel->progs.bpf_prog);
+ err = bpf_prog_test_run_xattr(&tattr);
+ ASSERT_EQ(err, 0, "err");
+ ASSERT_EQ(tattr.retval, 1, "retval");
+ ASSERT_GT(ctx.map_fd, 0, "ctx.map_fd");
+ ASSERT_GT(ctx.prog_fd, 0, "ctx.prog_fd");
+ ASSERT_OK(memcmp(verifier_log, "processed", sizeof("processed") - 1),
+ "verifier_log");
+
+ err = bpf_map_lookup_elem(ctx.map_fd, &key, &value);
+ ASSERT_EQ(err, 0, "map_lookup");
+ ASSERT_EQ(value, 34, "map lookup value");
+cleanup:
+ syscall__destroy(skel);
+ if (ctx.prog_fd > 0)
+ close(ctx.prog_fd);
+ if (ctx.map_fd > 0)
+ close(ctx.map_fd);
+ if (ctx.btf_fd > 0)
+ close(ctx.btf_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index ee27d68d2a1c..b5940e6ca67c 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -715,6 +715,8 @@ out:
bpf_object__close(obj);
}
+#include "tailcall_bpf2bpf4.skel.h"
+
/* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved
* across tailcalls combined with bpf2bpf calls. for making sure that tailcall
* counter behaves correctly, bpf program will go through following flow:
@@ -727,10 +729,15 @@ out:
* the loop begins. At the end of the test make sure that the global counter is
* equal to 31, because tailcall counter includes the first two tailcalls
* whereas global counter is incremented only on loop presented on flow above.
+ *
+ * The noise parameter is used to insert bpf_map_update calls into the logic
+ * to force verifier to patch instructions. This allows us to ensure jump
+ * logic remains correct with instruction movement.
*/
-static void test_tailcall_bpf2bpf_4(void)
+static void test_tailcall_bpf2bpf_4(bool noise)
{
- int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ int err, map_fd, prog_fd, main_fd, data_fd, i;
+ struct tailcall_bpf2bpf4__bss val;
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
@@ -774,11 +781,6 @@ static void test_tailcall_bpf2bpf_4(void)
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
-
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
return;
@@ -788,9 +790,21 @@ static void test_tailcall_bpf2bpf_4(void)
return;
i = 0;
+ val.noise = noise;
+ val.count = 0;
+ err = bpf_map_update_elem(data_fd, &i, &val, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+ &duration, &retval, NULL);
+ CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
+ err, errno, retval);
+
+ i = 0;
err = bpf_map_lookup_elem(data_fd, &i, &val);
- CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n",
- err, errno, val);
+ CHECK(err || val.count != 31, "tailcall count", "err %d errno %d count %d\n",
+ err, errno, val.count);
out:
bpf_object__close(obj);
@@ -815,5 +829,7 @@ void test_tailcalls(void)
if (test__start_subtest("tailcall_bpf2bpf_3"))
test_tailcall_bpf2bpf_3();
if (test__start_subtest("tailcall_bpf2bpf_4"))
- test_tailcall_bpf2bpf_4();
+ test_tailcall_bpf2bpf_4(false);
+ if (test__start_subtest("tailcall_bpf2bpf_5"))
+ test_tailcall_bpf2bpf_4(true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
new file mode 100644
index 000000000000..37c20b5ffa70
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include "test_task_pt_regs.skel.h"
+
+void test_task_pt_regs(void)
+{
+ struct test_task_pt_regs *skel;
+ struct bpf_link *uprobe_link;
+ size_t uprobe_offset;
+ ssize_t base_addr;
+ bool match;
+
+ base_addr = get_base_addr();
+ if (!ASSERT_GT(base_addr, 0, "get_base_addr"))
+ return;
+ uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+
+ skel = test_task_pt_regs__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+ if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
+ goto cleanup;
+
+ uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
+ false /* retprobe */,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
+ goto cleanup;
+ skel->links.handle_uprobe = uprobe_link;
+
+ /* trigger & validate uprobe */
+ get_base_addr();
+
+ if (!ASSERT_EQ(skel->bss->uprobe_res, 1, "check_uprobe_res"))
+ goto cleanup;
+
+ match = !memcmp(&skel->bss->current_regs, &skel->bss->ctx_regs,
+ sizeof(skel->bss->current_regs));
+ ASSERT_TRUE(match, "check_regs_match");
+
+cleanup:
+ test_task_pt_regs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
new file mode 100644
index 000000000000..4a505a5adf4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <linux/pkt_cls.h>
+
+#include "test_tc_bpf.skel.h"
+
+#define LO_IFINDEX 1
+
+#define TEST_DECLARE_OPTS(__fd) \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_h, .handle = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_p, .priority = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_f, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hp, .handle = 1, .priority = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hf, .handle = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_pf, .priority = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpf, .handle = 1, .priority = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpi, .handle = 1, .priority = 1, .prog_id = 42); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpr, .handle = 1, .priority = 1, \
+ .flags = BPF_TC_F_REPLACE); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpfi, .handle = 1, .priority = 1, .prog_fd = __fd, \
+ .prog_id = 42); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_prio_max, .handle = 1, .priority = UINT16_MAX + 1);
+
+static int test_tc_bpf_basic(const struct bpf_tc_hook *hook, int fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int ret;
+
+ ret = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd"))
+ return ret;
+
+ ret = bpf_tc_attach(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ return ret;
+
+ if (!ASSERT_EQ(opts.handle, 1, "handle set") ||
+ !ASSERT_EQ(opts.priority, 1, "priority set") ||
+ !ASSERT_EQ(opts.prog_id, info.id, "prog_id set"))
+ goto end;
+
+ opts.prog_id = 0;
+ opts.flags = BPF_TC_F_REPLACE;
+ ret = bpf_tc_attach(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach replace mode"))
+ goto end;
+
+ opts.flags = opts.prog_fd = opts.prog_id = 0;
+ ret = bpf_tc_query(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_query"))
+ goto end;
+
+ if (!ASSERT_EQ(opts.handle, 1, "handle set") ||
+ !ASSERT_EQ(opts.priority, 1, "priority set") ||
+ !ASSERT_EQ(opts.prog_id, info.id, "prog_id set"))
+ goto end;
+
+end:
+ opts.flags = opts.prog_fd = opts.prog_id = 0;
+ ret = bpf_tc_detach(hook, &opts);
+ ASSERT_OK(ret, "bpf_tc_detach");
+ return ret;
+}
+
+static int test_tc_bpf_api(struct bpf_tc_hook *hook, int fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_opts, .handle = 1, .priority = 1, .prog_fd = fd);
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, inv_hook, .attach_point = BPF_TC_INGRESS);
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1);
+ int ret;
+
+ ret = bpf_tc_hook_create(NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook = NULL"))
+ return -EINVAL;
+
+ /* hook ifindex = 0 */
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex == 0"))
+ return -EINVAL;
+ attach_opts.prog_id = 0;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ /* hook ifindex < 0 */
+ inv_hook.ifindex = -1;
+
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex < 0"))
+ return -EINVAL;
+ attach_opts.prog_id = 0;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ inv_hook.ifindex = LO_IFINDEX;
+
+ /* hook.attach_point invalid */
+ inv_hook.attach_point = 0xabcd;
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook.attach_point"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_INGRESS;
+
+ /* hook.attach_point valid, but parent invalid */
+ inv_hook.parent = TC_H_MAKE(1UL << 16, 10);
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_CUSTOM;
+ inv_hook.parent = 0;
+ /* These return EOPNOTSUPP instead of EINVAL as parent is checked after
+ * attach_point of the hook.
+ */
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_INGRESS;
+
+ /* detach */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_detach(NULL, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid opts = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpr);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid flags set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpf);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_fd set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_p);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid handle unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_h);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority > UINT16_MAX"))
+ return -EINVAL;
+ }
+
+ /* query */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_query(NULL, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid opts = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpr);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid flags set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpf);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_fd set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_p);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid handle unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_h);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority > UINT16_MAX"))
+ return -EINVAL;
+
+ /* when chain is not present, kernel returns -EINVAL */
+ ret = bpf_tc_query(hook, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query valid handle, priority set"))
+ return -EINVAL;
+ }
+
+ /* attach */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_attach(NULL, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid opts = NULL"))
+ return -EINVAL;
+
+ opts_hp.flags = 42;
+ ret = bpf_tc_attach(hook, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid flags"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_fd unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_pf);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid handle unset"))
+ return -EINVAL;
+ opts_pf.prog_fd = opts_pf.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_pf), "bpf_tc_detach");
+
+ ret = bpf_tc_attach(hook, &opts_hf);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid priority unset"))
+ return -EINVAL;
+ opts_hf.prog_fd = opts_hf.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_hf), "bpf_tc_detach");
+
+ ret = bpf_tc_attach(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid priority > UINT16_MAX"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_f);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid both handle and priority unset"))
+ return -EINVAL;
+ opts_f.prog_fd = opts_f.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_f), "bpf_tc_detach");
+ }
+
+ return 0;
+}
+
+void test_tc_bpf(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_INGRESS);
+ struct test_tc_bpf *skel = NULL;
+ bool hook_created = false;
+ int cls_fd, ret;
+
+ skel = test_tc_bpf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load"))
+ return;
+
+ cls_fd = bpf_program__fd(skel->progs.cls);
+
+ ret = bpf_tc_hook_create(&hook);
+ if (ret == 0)
+ hook_created = true;
+
+ ret = ret == -EEXIST ? 0 : ret;
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create(BPF_TC_INGRESS)"))
+ goto end;
+
+ hook.attach_point = BPF_TC_CUSTOM;
+ hook.parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+ ret = bpf_tc_hook_create(&hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook.attach_point"))
+ goto end;
+
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal ingress"))
+ goto end;
+
+ ret = bpf_tc_hook_destroy(&hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook.attach_point"))
+ goto end;
+
+ hook.attach_point = BPF_TC_INGRESS;
+ hook.parent = 0;
+ bpf_tc_hook_destroy(&hook);
+
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal ingress"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+ hook.attach_point = BPF_TC_EGRESS;
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal egress"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+ ret = test_tc_bpf_api(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_bpf_api"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+end:
+ if (hook_created) {
+ hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&hook);
+ }
+ test_tc_bpf__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 5703c918812b..e7201ba29ccd 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -13,15 +13,16 @@
#define _GNU_SOURCE
#include <arpa/inet.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
#include <linux/limits.h>
#include <linux/sysctl.h>
-#include <linux/if_tun.h>
-#include <linux/if.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
-#include <sys/stat.h>
#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
#include "test_progs.h"
#include "network_helpers.h"
@@ -391,9 +392,7 @@ done:
static int test_ping(int family, const char *addr)
{
- const char *ping = family == AF_INET6 ? "ping6" : "ping";
-
- SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr);
+ SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
return 0;
fail:
return -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index 08d19cafd5e8..1fa772079967 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -353,8 +353,7 @@ static void fastopen_estab(void)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, true)) {
@@ -398,8 +397,7 @@ static void syncookie_estab(void)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -431,8 +429,7 @@ static void fin(void)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -471,8 +468,7 @@ static void __simple_estab(bool exprm)
return;
link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
@@ -509,8 +505,7 @@ static void misc(void)
return;
link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
- if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(misc_estab)"))
return;
if (sk_fds_connect(&sk_fds, false)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 9966685866fd..123c68c1917d 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -73,7 +73,7 @@ void test_test_overhead(void)
return;
obj = bpf_object__open_file("./test_overhead.o", NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
@@ -108,7 +108,7 @@ void test_test_overhead(void)
/* attach kprobe */
link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */,
kprobe_func);
- if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_kprobe"))
goto cleanup;
test_run("kprobe");
bpf_link__destroy(link);
@@ -116,28 +116,28 @@ void test_test_overhead(void)
/* attach kretprobe */
link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */,
kprobe_func);
- if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_kretprobe"))
goto cleanup;
test_run("kretprobe");
bpf_link__destroy(link);
/* attach raw_tp */
link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename");
- if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto cleanup;
test_run("raw_tp");
bpf_link__destroy(link);
/* attach fentry */
link = bpf_program__attach_trace(fentry_prog);
- if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_fentry"))
goto cleanup;
test_run("fentry");
bpf_link__destroy(link);
/* attach fexit */
link = bpf_program__attach_trace(fexit_prog);
- if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_fexit"))
goto cleanup;
test_run("fexit");
bpf_link__destroy(link);
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
new file mode 100644
index 000000000000..25f40e1b9967
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer.skel.h"
+
+static int timer(struct timer *timer_skel)
+{
+ int err, prog_fd;
+ __u32 duration = 0, retval;
+
+ err = timer__attach(timer_skel);
+ if (!ASSERT_OK(err, "timer_attach"))
+ return err;
+
+ ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
+ ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
+
+ prog_fd = bpf_program__fd(timer_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 0, "test_run");
+ timer__detach(timer_skel);
+
+ usleep(50); /* 10 usecs should be enough, but give it extra */
+ /* check that timer_cb1() was executed 10+10 times */
+ ASSERT_EQ(timer_skel->data->callback_check, 42, "callback_check2");
+ ASSERT_EQ(timer_skel->data->callback2_check, 42, "callback2_check2");
+
+ /* check that timer_cb2() was executed twice */
+ ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data");
+
+ /* check that there were no errors in timer execution */
+ ASSERT_EQ(timer_skel->bss->err, 0, "err");
+
+ /* check that code paths completed */
+ ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
+
+ return 0;
+}
+
+void test_timer(void)
+{
+ struct timer *timer_skel = NULL;
+ int err;
+
+ timer_skel = timer__open_and_load();
+ if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+ goto cleanup;
+
+ err = timer(timer_skel);
+ ASSERT_OK(err, "timer");
+cleanup:
+ timer__destroy(timer_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
new file mode 100644
index 000000000000..ced8f6cf347c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer_mim.skel.h"
+#include "timer_mim_reject.skel.h"
+
+static int timer_mim(struct timer_mim *timer_skel)
+{
+ __u32 duration = 0, retval;
+ __u64 cnt1, cnt2;
+ int err, prog_fd, key1 = 1;
+
+ err = timer_mim__attach(timer_skel);
+ if (!ASSERT_OK(err, "timer_attach"))
+ return err;
+
+ prog_fd = bpf_program__fd(timer_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
+ NULL, NULL, &retval, &duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 0, "test_run");
+ timer_mim__detach(timer_skel);
+
+ /* check that timer_cb[12] are incrementing 'cnt' */
+ cnt1 = READ_ONCE(timer_skel->bss->cnt);
+ for (int i = 0; i < 100; i++) {
+ cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ if (cnt2 != cnt1)
+ break;
+ usleep(200); /* 100 times more than interval */
+ }
+ ASSERT_GT(cnt2, cnt1, "cnt");
+
+ ASSERT_EQ(timer_skel->bss->err, 0, "err");
+ /* check that code paths completed */
+ ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok");
+
+ close(bpf_map__fd(timer_skel->maps.inner_htab));
+ err = bpf_map_delete_elem(bpf_map__fd(timer_skel->maps.outer_arr), &key1);
+ ASSERT_EQ(err, 0, "delete inner map");
+
+ /* check that timer_cb[12] are no longer running */
+ cnt1 = READ_ONCE(timer_skel->bss->cnt);
+ for (int i = 0; i < 100; i++) {
+ usleep(200); /* 100 times more than interval */
+ cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ if (cnt2 == cnt1)
+ break;
+ }
+ ASSERT_EQ(cnt2, cnt1, "cnt");
+
+ return 0;
+}
+
+void test_timer_mim(void)
+{
+ struct timer_mim_reject *timer_reject_skel = NULL;
+ libbpf_print_fn_t old_print_fn = NULL;
+ struct timer_mim *timer_skel = NULL;
+ int err;
+
+ old_print_fn = libbpf_set_print(NULL);
+ timer_reject_skel = timer_mim_reject__open_and_load();
+ libbpf_set_print(old_print_fn);
+ if (!ASSERT_ERR_PTR(timer_reject_skel, "timer_reject_skel_load"))
+ goto cleanup;
+
+ timer_skel = timer_mim__open_and_load();
+ if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+ goto cleanup;
+
+ err = timer_mim(timer_skel);
+ ASSERT_OK(err, "timer_mim");
+cleanup:
+ timer_mim__destroy(timer_skel);
+ timer_mim_reject__destroy(timer_reject_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
index 39b0decb1bb2..d39bc00feb45 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -3,7 +3,7 @@
#include <test_progs.h>
-#include "trace_printk.skel.h"
+#include "trace_printk.lskel.h"
#define TRACEBUF "/sys/kernel/debug/tracing/trace_pipe"
#define SEARCHMSG "testing,testing"
@@ -21,6 +21,9 @@ void test_trace_printk(void)
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
return;
+ ASSERT_EQ(skel->rodata->fmt[0], 'T', "invalid printk fmt string");
+ skel->rodata->fmt[0] = 't';
+
err = trace_printk__load(skel);
if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index f3022d934e2d..d7f5a931d7f3 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -55,7 +55,7 @@ void test_trampoline_count(void)
/* attach 'allowed' trampoline programs */
for (i = 0; i < MAX_TRAMP_PROGS; i++) {
obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+ if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
obj = NULL;
goto cleanup;
}
@@ -68,14 +68,14 @@ void test_trampoline_count(void)
if (rand() % 2) {
link = load(inst[i].obj, fentry_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "attach_prog")) {
link = NULL;
goto cleanup;
}
inst[i].link_fentry = link;
} else {
link = load(inst[i].obj, fexit_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "attach_prog")) {
link = NULL;
goto cleanup;
}
@@ -85,7 +85,7 @@ void test_trampoline_count(void)
/* and try 1 extra.. */
obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+ if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
obj = NULL;
goto cleanup;
}
@@ -96,13 +96,15 @@ void test_trampoline_count(void)
/* ..that needs to fail */
link = load(obj, fentry_name);
- if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
+ err = libbpf_get_error(link);
+ if (!ASSERT_ERR_PTR(link, "cannot attach over the limit")) {
bpf_link__destroy(link);
goto cleanup_extra;
}
/* with E2BIG error */
- CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
+ ASSERT_EQ(err, -E2BIG, "proper error check");
+ ASSERT_EQ(link, NULL, "ptr_is_null");
/* and finaly execute the probe */
if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
index 2aba09d4d01b..56c9d6bd38a3 100644
--- a/tools/testing/selftests/bpf/prog_tests/udp_limit.c
+++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
@@ -22,11 +22,10 @@ void test_udp_limit(void)
goto close_cgroup_fd;
skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.sock, "cg_attach_sock"))
+ goto close_skeleton;
skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd);
- if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release),
- "cg-attach", "sock %ld sock_release %ld",
- PTR_ERR(skel->links.sock),
- PTR_ERR(skel->links.sock_release)))
+ if (!ASSERT_OK_PTR(skel->links.sock_release, "cg_attach_sock_release"))
goto close_skeleton;
/* BPF program enforces a single UDP socket per cgroup,
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
new file mode 100644
index 000000000000..ad3ba81b4048
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -0,0 +1,574 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/**
+ * Test XDP bonding support
+ *
+ * Sets up two bonded veth pairs between two fresh namespaces
+ * and verifies that XDP_TX program loaded on a bond device
+ * are correctly loaded onto the slave devices and XDP_TX'd
+ * packets are balanced using bonding.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include <linux/if_bonding.h>
+#include <linux/limits.h>
+#include <linux/udp.h>
+
+#include "xdp_dummy.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
+#include "xdp_tx.skel.h"
+
+#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}
+#define BOND1_MAC_STR "00:11:22:33:44:55"
+#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}
+#define BOND2_MAC_STR "00:22:33:44:55:66"
+#define NPACKETS 100
+
+static int root_netns_fd = -1;
+
+static void restore_root_netns(void)
+{
+ ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns");
+}
+
+static int setns_by_name(char *name)
+{
+ int nsfd, err;
+ char nspath[PATH_MAX];
+
+ snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+ nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+ if (nsfd < 0)
+ return -1;
+
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
+ return err;
+}
+
+static int get_rx_packets(const char *iface)
+{
+ FILE *f;
+ char line[512];
+ int iface_len = strlen(iface);
+
+ f = fopen("/proc/net/dev", "r");
+ if (!f)
+ return -1;
+
+ while (fgets(line, sizeof(line), f)) {
+ char *p = line;
+
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ if (!strncmp(p, iface, iface_len)) {
+ p += iface_len;
+ if (*p++ != ':')
+ continue;
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ while (*p && *p != ' ')
+ p++; /* skip rx bytes */
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ fclose(f);
+ return atoi(p);
+ }
+ }
+ fclose(f);
+ return -1;
+}
+
+#define MAX_BPF_LINKS 8
+
+struct skeletons {
+ struct xdp_dummy *xdp_dummy;
+ struct xdp_tx *xdp_tx;
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+
+ int nlinks;
+ struct bpf_link *links[MAX_BPF_LINKS];
+};
+
+static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface)
+{
+ struct bpf_link *link;
+ int ifindex;
+
+ ifindex = if_nametoindex(iface);
+ if (!ASSERT_GT(ifindex, 0, "get ifindex"))
+ return -1;
+
+ if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached"))
+ return -1;
+
+ link = bpf_program__attach_xdp(prog, ifindex);
+ if (!ASSERT_OK_PTR(link, "attach xdp program"))
+ return -1;
+
+ skeletons->links[skeletons->nlinks++] = link;
+ return 0;
+}
+
+enum {
+ BOND_ONE_NO_ATTACH = 0,
+ BOND_BOTH_AND_ATTACH,
+};
+
+static const char * const mode_names[] = {
+ [BOND_MODE_ROUNDROBIN] = "balance-rr",
+ [BOND_MODE_ACTIVEBACKUP] = "active-backup",
+ [BOND_MODE_XOR] = "balance-xor",
+ [BOND_MODE_BROADCAST] = "broadcast",
+ [BOND_MODE_8023AD] = "802.3ad",
+ [BOND_MODE_TLB] = "balance-tlb",
+ [BOND_MODE_ALB] = "balance-alb",
+};
+
+static const char * const xmit_policy_names[] = {
+ [BOND_XMIT_POLICY_LAYER2] = "layer2",
+ [BOND_XMIT_POLICY_LAYER34] = "layer3+4",
+ [BOND_XMIT_POLICY_LAYER23] = "layer2+3",
+ [BOND_XMIT_POLICY_ENCAP23] = "encap2+3",
+ [BOND_XMIT_POLICY_ENCAP34] = "encap3+4",
+};
+
+static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
+ int bond_both_attach)
+{
+#define SYS(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ return -1; \
+ })
+
+ SYS("ip netns add ns_dst");
+ SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
+ SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
+
+ SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s",
+ mode_names[mode], xmit_policy_names[xmit_policy]);
+ SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
+ SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
+ mode_names[mode], xmit_policy_names[xmit_policy]);
+ SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
+
+ SYS("ip link set veth1_1 master bond1");
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+ SYS("ip link set veth1_2 master bond1");
+ } else {
+ SYS("ip link set veth1_2 up addrgenmode none");
+
+ if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
+ return -1;
+ }
+
+ SYS("ip -netns ns_dst link set veth2_1 master bond2");
+
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH)
+ SYS("ip -netns ns_dst link set veth2_2 master bond2");
+ else
+ SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none");
+
+ /* Load a dummy program on sending side as with veth peer needs to have a
+ * XDP program loaded as well.
+ */
+ if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1"))
+ return -1;
+
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+ if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst"))
+ return -1;
+
+ if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2"))
+ return -1;
+
+ restore_root_netns();
+ }
+
+ return 0;
+
+#undef SYS
+}
+
+static void bonding_cleanup(struct skeletons *skeletons)
+{
+ restore_root_netns();
+ while (skeletons->nlinks) {
+ skeletons->nlinks--;
+ bpf_link__destroy(skeletons->links[skeletons->nlinks]);
+ }
+ ASSERT_OK(system("ip link delete bond1"), "delete bond1");
+ ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1");
+ ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2");
+ ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst");
+}
+
+static int send_udp_packets(int vary_dst_ip)
+{
+ struct ethhdr eh = {
+ .h_source = BOND1_MAC,
+ .h_dest = BOND2_MAC,
+ .h_proto = htons(ETH_P_IP),
+ };
+ uint8_t buf[128] = {};
+ struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh));
+ struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph));
+ int i, s = -1;
+ int ifindex;
+
+ s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (!ASSERT_GE(s, 0, "socket"))
+ goto err;
+
+ ifindex = if_nametoindex("bond1");
+ if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex"))
+ goto err;
+
+ memcpy(buf, &eh, sizeof(eh));
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->tos = 16;
+ iph->id = 1;
+ iph->ttl = 64;
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = 1;
+ iph->daddr = 2;
+ iph->tot_len = htons(sizeof(buf) - ETH_HLEN);
+ iph->check = 0;
+
+ for (i = 1; i <= NPACKETS; i++) {
+ int n;
+ struct sockaddr_ll saddr_ll = {
+ .sll_ifindex = ifindex,
+ .sll_halen = ETH_ALEN,
+ .sll_addr = BOND2_MAC,
+ };
+
+ /* vary the UDP destination port for even distribution with roundrobin/xor modes */
+ uh->dest++;
+
+ if (vary_dst_ip)
+ iph->daddr++;
+
+ n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll));
+ if (!ASSERT_EQ(n, sizeof(buf), "sendto"))
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (s >= 0)
+ close(s);
+ return -1;
+}
+
+static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy)
+{
+ int bond1_rx;
+
+ if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH))
+ goto out;
+
+ if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34))
+ goto out;
+
+ bond1_rx = get_rx_packets("bond1");
+ ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets");
+
+ switch (mode) {
+ case BOND_MODE_ROUNDROBIN:
+ case BOND_MODE_XOR: {
+ int veth1_rx = get_rx_packets("veth1_1");
+ int veth2_rx = get_rx_packets("veth1_2");
+ int diff = abs(veth1_rx - veth2_rx);
+
+ ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets");
+
+ switch (xmit_policy) {
+ case BOND_XMIT_POLICY_LAYER2:
+ ASSERT_GE(diff, NPACKETS,
+ "expected packets on only one of the interfaces");
+ break;
+ case BOND_XMIT_POLICY_LAYER23:
+ case BOND_XMIT_POLICY_LAYER34:
+ ASSERT_LT(diff, NPACKETS/2,
+ "expected even distribution of packets");
+ break;
+ default:
+ PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+ break;
+ }
+ break;
+ }
+ case BOND_MODE_ACTIVEBACKUP: {
+ int veth1_rx = get_rx_packets("veth1_1");
+ int veth2_rx = get_rx_packets("veth1_2");
+ int diff = abs(veth1_rx - veth2_rx);
+
+ ASSERT_GE(diff, NPACKETS,
+ "expected packets on only one of the interfaces");
+ break;
+ }
+ default:
+ PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+ break;
+ }
+
+out:
+ bonding_cleanup(skeletons);
+}
+
+/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding
+ * all the interfaces to it and checking that broadcasting won't send the packet
+ * to neither the ingress bond device (bond2) or its slave (veth2_1).
+ */
+static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons)
+{
+ static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"};
+ int veth1_1_rx, veth1_2_rx;
+ int err;
+
+ if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23,
+ BOND_ONE_NO_ATTACH))
+ goto out;
+
+
+ if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst"))
+ goto out;
+
+ /* populate the devmap with the relevant interfaces */
+ for (int i = 0; i < ARRAY_SIZE(ifaces); i++) {
+ int ifindex = if_nametoindex(ifaces[i]);
+ int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all);
+
+ if (!ASSERT_GT(ifindex, 0, "could not get interface index"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0);
+ if (!ASSERT_OK(err, "add interface to map_all"))
+ goto out;
+ }
+
+ if (xdp_attach(skeletons,
+ skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog,
+ "bond2"))
+ goto out;
+
+ restore_root_netns();
+
+ if (send_udp_packets(BOND_MODE_ROUNDROBIN))
+ goto out;
+
+ veth1_1_rx = get_rx_packets("veth1_1");
+ veth1_2_rx = get_rx_packets("veth1_2");
+
+ ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1");
+ ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2");
+
+out:
+ restore_root_netns();
+ bonding_cleanup(skeletons);
+}
+
+/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */
+static void test_xdp_bonding_attach(struct skeletons *skeletons)
+{
+ struct bpf_link *link = NULL;
+ struct bpf_link *link2 = NULL;
+ int veth, bond, err;
+
+ if (!ASSERT_OK(system("ip link add veth type veth"), "add veth"))
+ goto out;
+ if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+ goto out;
+
+ veth = if_nametoindex("veth");
+ if (!ASSERT_GE(veth, 0, "if_nametoindex veth"))
+ goto out;
+ bond = if_nametoindex("bond");
+ if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+ goto out;
+
+ /* enslaving with a XDP program loaded is allowed */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_OK_PTR(link, "attach program to veth"))
+ goto out;
+
+ err = system("ip link set veth master bond");
+ if (!ASSERT_OK(err, "set veth master"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* attaching to slave when master has no program is allowed */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved"))
+ goto out;
+
+ /* attaching to master not allowed when slave has program loaded */
+ link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* attaching XDP program to master allowed when slave has no program */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_OK_PTR(link, "attach program to master"))
+ goto out;
+
+ /* attaching to slave not allowed when master has program loaded */
+ link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_ERR_PTR(link2, "attach program to slave when master has program"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* test program unwinding with a non-XDP slave */
+ if (!ASSERT_OK(system("ip link add vxlan type vxlan id 1 remote 1.2.3.4 dstport 0 dev lo"),
+ "add vxlan"))
+ goto out;
+
+ err = system("ip link set vxlan master bond");
+ if (!ASSERT_OK(err, "set vxlan master"))
+ goto out;
+
+ /* attaching not allowed when one slave does not support XDP */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_ERR_PTR(link, "attach program to master when slave does not support XDP"))
+ goto out;
+
+out:
+ bpf_link__destroy(link);
+ bpf_link__destroy(link2);
+
+ system("ip link del veth");
+ system("ip link del bond");
+ system("ip link del vxlan");
+}
+
+/* Test with nested bonding devices to catch issue with negative jump label count */
+static void test_xdp_bonding_nested(struct skeletons *skeletons)
+{
+ struct bpf_link *link = NULL;
+ int bond, err;
+
+ if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+ goto out;
+
+ bond = if_nametoindex("bond");
+ if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link add bond_nest1 type bond"), "add bond_nest1"))
+ goto out;
+
+ err = system("ip link set bond_nest1 master bond");
+ if (!ASSERT_OK(err, "set bond_nest1 master"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link add bond_nest2 type bond"), "add bond_nest1"))
+ goto out;
+
+ err = system("ip link set bond_nest2 master bond_nest1");
+ if (!ASSERT_OK(err, "set bond_nest2 master"))
+ goto out;
+
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ ASSERT_OK_PTR(link, "attach program to master");
+
+out:
+ bpf_link__destroy(link);
+ system("ip link del bond");
+ system("ip link del bond_nest1");
+ system("ip link del bond_nest2");
+}
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level != LIBBPF_WARN)
+ vprintf(format, args);
+ return 0;
+}
+
+struct bond_test_case {
+ char *name;
+ int mode;
+ int xmit_policy;
+};
+
+static struct bond_test_case bond_test_cases[] = {
+ { "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, },
+ { "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 },
+
+ { "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, },
+ { "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, },
+ { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, },
+};
+
+void test_xdp_bonding(void)
+{
+ libbpf_print_fn_t old_print_fn;
+ struct skeletons skeletons = {};
+ int i;
+
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+ goto out;
+
+ skeletons.xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+ goto out;
+
+ skeletons.xdp_tx = xdp_tx__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+ goto out;
+
+ skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern,
+ "xdp_redirect_multi_kern__open_and_load"))
+ goto out;
+
+ if (test__start_subtest("xdp_bonding_attach"))
+ test_xdp_bonding_attach(&skeletons);
+
+ if (test__start_subtest("xdp_bonding_nested"))
+ test_xdp_bonding_nested(&skeletons);
+
+ for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
+ struct bond_test_case *test_case = &bond_test_cases[i];
+
+ if (test__start_subtest(test_case->name))
+ test_xdp_bonding_with_mode(
+ &skeletons,
+ test_case->mode,
+ test_case->xmit_policy);
+ }
+
+ if (test__start_subtest("xdp_bonding_redirect_multi"))
+ test_xdp_bonding_redirect_multi(&skeletons);
+
+out:
+ xdp_dummy__destroy(skeletons.xdp_dummy);
+ xdp_tx__destroy(skeletons.xdp_tx);
+ xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern);
+
+ libbpf_set_print(old_print_fn);
+ if (root_netns_fd >= 0)
+ close(root_netns_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 2c6c570b21f8..3bd5904b4db5 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -90,7 +90,7 @@ void test_xdp_bpf2bpf(void)
pb_opts.ctx = &passed;
pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out;
/* Run test program */
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
new file mode 100644
index 000000000000..ab4952b9fb1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_context_test_run.skel.h"
+
+void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
+ __u32 data_meta, __u32 data, __u32 data_end,
+ __u32 ingress_ifindex, __u32 rx_queue_index,
+ __u32 egress_ifindex)
+{
+ struct xdp_md ctx = {
+ .data = data,
+ .data_end = data_end,
+ .data_meta = data_meta,
+ .ingress_ifindex = ingress_ifindex,
+ .rx_queue_index = rx_queue_index,
+ .egress_ifindex = egress_ifindex,
+ };
+ int err;
+
+ opts.ctx_in = &ctx;
+ opts.ctx_size_in = sizeof(ctx);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(errno, EINVAL, "errno-EINVAL");
+ ASSERT_ERR(err, "bpf_prog_test_run");
+}
+
+void test_xdp_context_test_run(void)
+{
+ struct test_xdp_context_test_run *skel = NULL;
+ char data[sizeof(pkt_v4) + sizeof(__u32)];
+ char bad_ctx[sizeof(struct xdp_md) + 1];
+ struct xdp_md ctx_in, ctx_out;
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = sizeof(data),
+ .ctx_out = &ctx_out,
+ .ctx_size_out = sizeof(ctx_out),
+ .repeat = 1,
+ );
+ int err, prog_fd;
+
+ skel = test_xdp_context_test_run__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.xdp_context);
+
+ /* Data past the end of the kernel's struct xdp_md must be 0 */
+ bad_ctx[sizeof(bad_ctx) - 1] = 1;
+ opts.ctx_in = bad_ctx;
+ opts.ctx_size_in = sizeof(bad_ctx);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(errno, E2BIG, "extradata-errno");
+ ASSERT_ERR(err, "bpf_prog_test_run(extradata)");
+
+ *(__u32 *)data = XDP_PASS;
+ *(struct ipv4_packet *)(data + sizeof(__u32)) = pkt_v4;
+ opts.ctx_in = &ctx_in;
+ opts.ctx_size_in = sizeof(ctx_in);
+ memset(&ctx_in, 0, sizeof(ctx_in));
+ ctx_in.data_meta = 0;
+ ctx_in.data = sizeof(__u32);
+ ctx_in.data_end = ctx_in.data + sizeof(pkt_v4);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "bpf_prog_test_run(valid)");
+ ASSERT_EQ(opts.retval, XDP_PASS, "valid-retval");
+ ASSERT_EQ(opts.data_size_out, sizeof(pkt_v4), "valid-datasize");
+ ASSERT_EQ(opts.ctx_size_out, opts.ctx_size_in, "valid-ctxsize");
+ ASSERT_EQ(ctx_out.data_meta, 0, "valid-datameta");
+ ASSERT_EQ(ctx_out.data, 0, "valid-data");
+ ASSERT_EQ(ctx_out.data_end, sizeof(pkt_v4), "valid-dataend");
+
+ /* Meta data's size must be a multiple of 4 */
+ test_xdp_context_error(prog_fd, opts, 0, 1, sizeof(data), 0, 0, 0);
+
+ /* data_meta must reference the start of data */
+ test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data),
+ 0, 0, 0);
+
+ /* Meta data must be 32 bytes or smaller */
+ test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0);
+
+ /* Total size of data must match data_end - data_meta */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
+ sizeof(data) - 1, 0, 0, 0);
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
+ sizeof(data) + 1, 0, 0, 0);
+
+ /* RX queue cannot be specified without specifying an ingress */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 0, 1, 0);
+
+ /* Interface 1 is always the loopback interface which always has only
+ * one RX queue (index 0). This makes index 1 an invalid rx queue index
+ * for interface 1.
+ */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 1, 1, 0);
+
+ /* The egress cannot be specified */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 0, 0, 1);
+
+ test_xdp_context_test_run__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index 0176573fe4e7..8755effd80b0 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -7,64 +7,53 @@
#define IFINDEX_LO 1
-void test_xdp_with_cpumap_helpers(void)
+void test_xdp_cpumap_attach(void)
{
struct test_xdp_with_cpumap_helpers *skel;
struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
struct bpf_cpumap_val val = {
.qsize = 192,
};
- __u32 duration = 0, idx = 0;
- __u32 len = sizeof(info);
int err, prog_fd, map_fd;
+ __u32 idx = 0;
skel = test_xdp_with_cpumap_helpers__open_and_load();
- if (CHECK_FAIL(!skel)) {
- perror("test_xdp_with_cpumap_helpers__open_and_load");
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
return;
- }
- /* can not attach program with cpumaps that allow programs
- * as xdp generic
- */
prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP",
- "should have failed\n");
+ if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
+ goto out_close;
+
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+ ASSERT_OK(err, "XDP program detach");
prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
map_fd = bpf_map__fd(skel->maps.cpu_map);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
goto out_close;
val.bpf_prog.fd = prog_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err, "Add program to cpumap entry", "err %d errno %d\n",
- err, errno);
+ ASSERT_OK(err, "Add program to cpumap entry");
err = bpf_map_lookup_elem(map_fd, &idx, &val);
- CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno);
- CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry",
- "expected %u read %u\n", info.id, val.bpf_prog.id);
+ ASSERT_OK(err, "Read cpumap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
/* can not attach BPF_XDP_CPUMAP program to a device */
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program",
- "should have failed\n");
+ if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
+ bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
val.qsize = 192;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry",
- "should have failed\n");
+ ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
out_close:
test_xdp_with_cpumap_helpers__destroy(skel);
}
-
-void test_xdp_cpumap_attach(void)
-{
- if (test__start_subtest("cpumap_with_progs"))
- test_xdp_with_cpumap_helpers();
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 88ef3ec8ac4c..c72af030ff10 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -16,50 +16,45 @@ void test_xdp_with_devmap_helpers(void)
.ifindex = IFINDEX_LO,
};
__u32 len = sizeof(info);
- __u32 duration = 0, idx = 0;
int err, dm_fd, map_fd;
+ __u32 idx = 0;
skel = test_xdp_with_devmap_helpers__open_and_load();
- if (CHECK_FAIL(!skel)) {
- perror("test_xdp_with_devmap_helpers__open_and_load");
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
return;
- }
- /* can not attach program with DEVMAPs that allow programs
- * as xdp generic
- */
dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Generic attach of program with 8-byte devmap",
- "should have failed\n");
+ if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
+ goto out_close;
+
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
+ ASSERT_OK(err, "XDP program detach");
dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
map_fd = bpf_map__fd(skel->maps.dm_ports);
err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
goto out_close;
val.bpf_prog.fd = dm_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err, "Add program to devmap entry",
- "err %d errno %d\n", err, errno);
+ ASSERT_OK(err, "Add program to devmap entry");
err = bpf_map_lookup_elem(map_fd, &idx, &val);
- CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
- CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
- "expected %u read %u\n", info.id, val.bpf_prog.id);
+ ASSERT_OK(err, "Read devmap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
/* can not attach BPF_XDP_DEVMAP program to a device */
err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
- "should have failed\n");
+ if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
+ bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
val.ifindex = 1;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
- "should have failed\n");
+ ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
out_close:
test_xdp_with_devmap_helpers__destroy(skel);
@@ -68,12 +63,10 @@ out_close:
void test_neg_xdp_devmap_helpers(void)
{
struct test_xdp_devmap_helpers *skel;
- __u32 duration = 0;
skel = test_xdp_devmap_helpers__open_and_load();
- if (CHECK(skel,
- "Load of XDP program accessing egress ifindex without attach type",
- "should have failed\n")) {
+ if (!ASSERT_EQ(skel, NULL,
+ "Load of XDP program accessing egress ifindex without attach type")) {
test_xdp_devmap_helpers__destroy(skel);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 6f814999b395..46eed0a33c23 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -51,7 +51,7 @@ void test_xdp_link(void)
/* BPF link is not allowed to replace prog attachment */
link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
bpf_link__destroy(link);
/* best-effort detach prog */
opts.old_fd = prog_fd1;
@@ -67,7 +67,7 @@ void test_xdp_link(void)
/* now BPF link should attach successfully */
link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
skel1->links.xdp_handler = link;
@@ -95,7 +95,7 @@ void test_xdp_link(void)
/* BPF link is not allowed to replace another BPF link */
link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
bpf_link__destroy(link);
goto cleanup;
}
@@ -105,7 +105,7 @@ void test_xdp_link(void)
/* new link attach should succeed */
link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
- if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
skel2->links.xdp_handler = link;
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index fd42247da8b4..9573be6122be 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -17,6 +17,11 @@
char _license[] SEC("license") = "GPL";
+volatile const char fallback[TCP_CA_NAME_MAX];
+const char bpf_dctcp[] = "bpf_dctcp";
+const char tcp_cdg[] = "cdg";
+char cc_res[TCP_CA_NAME_MAX];
+int tcp_cdg_res = 0;
int stg_result = 0;
struct {
@@ -57,6 +62,26 @@ void BPF_PROG(dctcp_init, struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
int *stg;
+ if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
+ /* Switch to fallback */
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)fallback, sizeof(fallback));
+ /* Switch back to myself which the bpf trampoline
+ * stopped calling dctcp_init recursively.
+ */
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)bpf_dctcp, sizeof(bpf_dctcp));
+ /* Switch back to fallback */
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)fallback, sizeof(fallback));
+ /* Expecting -ENOTSUPP for tcp_cdg_res */
+ tcp_cdg_res = bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)tcp_cdg, sizeof(tcp_cdg));
+ bpf_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)cc_res, sizeof(cc_res));
+ return;
+ }
+
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
ca->loss_cwnd = 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
new file mode 100644
index 000000000000..d836f7c372f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+const char cubic[] = "cubic";
+
+void BPF_STRUCT_OPS(dctcp_nouse_release, struct sock *sk)
+{
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)cubic, sizeof(cubic));
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops dctcp_rel = {
+ .release = (void *)dctcp_nouse_release,
+ .name = "bpf_dctcp_rel",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 3d83b185c4bc..8cfaeba1ddbf 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -12,6 +12,7 @@
#define tcp6_sock tcp6_sock___not_used
#define bpf_iter__udp bpf_iter__udp___not_used
#define udp6_sock udp6_sock___not_used
+#define bpf_iter__unix bpf_iter__unix___not_used
#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
#define bpf_iter__sockmap bpf_iter__sockmap___not_used
@@ -32,6 +33,7 @@
#undef tcp6_sock
#undef bpf_iter__udp
#undef udp6_sock
+#undef bpf_iter__unix
#undef bpf_iter__bpf_map_elem
#undef bpf_iter__bpf_sk_storage_map
#undef bpf_iter__sockmap
@@ -103,6 +105,12 @@ struct udp6_sock {
struct ipv6_pinfo inet6;
} __attribute__((preserve_access_index));
+struct bpf_iter__unix {
+ struct bpf_iter_meta *meta;
+ struct unix_sock *unix_sk;
+ uid_t uid;
+} __attribute__((preserve_access_index));
+
struct bpf_iter__bpf_map_elem {
struct bpf_iter_meta *meta;
struct bpf_map *map;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
index 6dfce3fd68bc..0aa3cd34cbe3 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index b83b5d2e17dc..6c39e86b666f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index d58d9f1642b5..784a610ce039 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 95989f4c99b5..a28e51e2dcee 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
new file mode 100644
index 000000000000..b77adfd55d73
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define bpf_tcp_sk(skc) ({ \
+ struct sock_common *_skc = skc; \
+ sk = NULL; \
+ tp = NULL; \
+ if (_skc) { \
+ tp = bpf_skc_to_tcp_sock(_skc); \
+ sk = (struct sock *)tp; \
+ } \
+ tp; \
+})
+
+unsigned short reuse_listen_hport = 0;
+unsigned short listen_hport = 0;
+char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic";
+char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
+bool random_retry = false;
+
+static bool tcp_cc_eq(const char *a, const char *b)
+{
+ int i;
+
+ for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+ if (a[i] != b[i])
+ return false;
+ if (!a[i])
+ break;
+ }
+
+ return true;
+}
+
+SEC("iter/tcp")
+int change_tcp_cc(struct bpf_iter__tcp *ctx)
+{
+ char cur_cc[TCP_CA_NAME_MAX];
+ struct tcp_sock *tp;
+ struct sock *sk;
+ int ret;
+
+ if (!bpf_tcp_sk(ctx->sk_common))
+ return 0;
+
+ if (sk->sk_family != AF_INET6 ||
+ (sk->sk_state != TCP_LISTEN &&
+ sk->sk_state != TCP_ESTABLISHED) ||
+ (sk->sk_num != reuse_listen_hport &&
+ sk->sk_num != listen_hport &&
+ bpf_ntohs(sk->sk_dport) != listen_hport))
+ return 0;
+
+ if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION,
+ cur_cc, sizeof(cur_cc)))
+ return 0;
+
+ if (!tcp_cc_eq(cur_cc, cubic_cc))
+ return 0;
+
+ if (random_retry && bpf_get_prandom_u32() % 4 == 1)
+ return 1;
+
+ bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc));
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index b7f32c160f4e..c86b93f33b32 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
index a1ddc36f13ec..bca8b889cb10 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020, Oracle and/or its affiliates. */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
#include <errno.h>
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index b2f7c7c5f952..6e7b400888fe 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index 43c36f5f7649..f2b8167b72a8 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
index 11d1aa37cf11..4ea6a37d1345 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
@@ -2,7 +2,6 @@
/* Copyright (c) 2020 Facebook */
#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index 54380c5e1069..92267abb462f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
@@ -122,7 +121,7 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
}
BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
- seq_num, src, srcp, destp, destp);
+ seq_num, src, srcp, dest, destp);
BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
state,
tp->write_seq - tp->snd_una, rx_queue,
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index b4fbddfa4e10..943f7bba180e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index ee49493dc125..400fdf8d6233 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -9,8 +9,8 @@ __u32 map1_id = 0, map2_id = 0;
__u32 map1_accessed = 0, map2_accessed = 0;
__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
-static volatile const __u32 print_len;
-static volatile const __u32 ret1;
+volatile const __u32 print_len;
+volatile const __u32 ret1;
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
index f258583afbbd..cf0c485b1ed7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
index 65f93bb03f0f..5031e21c433f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -3,7 +3,6 @@
#include "bpf_iter.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include <bpf/bpf_endian.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
new file mode 100644
index 000000000000..94423902685d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/unix")
+int dump_unix(struct bpf_iter__unix *ctx)
+{
+ struct unix_sock *unix_sk = ctx->unix_sk;
+ struct sock *sk = (struct sock *)unix_sk;
+ struct seq_file *seq;
+ __u32 seq_num;
+
+ if (!unix_sk)
+ return 0;
+
+ seq = ctx->meta->seq;
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "Num RefCount Protocol Flags Type St Inode Path\n");
+
+ BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %8lu",
+ unix_sk,
+ sk->sk_refcnt.refs.counter,
+ 0,
+ sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+ sk->sk_type,
+ sk->sk_socket ?
+ (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
+ (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
+ sock_i_ino(sk));
+
+ if (unix_sk->addr) {
+ if (!UNIX_ABSTRACT(unix_sk)) {
+ BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
+ } else {
+ /* The name of the abstract UNIX domain socket starts
+ * with '\0' and can contain '\0'. The null bytes
+ * should be escaped as done in unix_seq_show().
+ */
+ __u64 i, len;
+
+ len = unix_sk->addr->len - sizeof(short);
+
+ BPF_SEQ_PRINTF(seq, " @");
+
+ for (i = 1; i < len; i++) {
+ /* unix_mkname() tests this upper bound. */
+ if (i >= sizeof(struct sockaddr_un))
+ break;
+
+ BPF_SEQ_PRINTF(seq, "%c",
+ unix_sk->addr->name->sun_path[i] ?:
+ '@');
+ }
+ }
+ }
+
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 01378911252b..eef5646ddb19 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -5,6 +5,14 @@
#define AF_INET 2
#define AF_INET6 10
+#define __SO_ACCEPTCON (1 << 16)
+#define UNIX_HASH_SIZE 256
+#define UNIX_ABSTRACT(unix_sk) (unix_sk->addr->hash < UNIX_HASH_SIZE)
+
+#define SOL_TCP 6
+#define TCP_CONGESTION 13
+#define TCP_CA_NAME_MAX 16
+
#define ICSK_TIME_RETRANS 1
#define ICSK_TIME_PROBE0 3
#define ICSK_TIME_LOSS_PROBE 5
@@ -32,6 +40,8 @@
#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr
#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr
+#define sk_num __sk_common.skc_num
+#define sk_dport __sk_common.skc_dport
#define sk_family __sk_common.skc_family
#define sk_rmem_alloc sk_backlog.rmem_alloc
#define sk_refcnt __sk_common.skc_refcnt
diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c
new file mode 100644
index 000000000000..b565d997810a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define VERDICT_REJECT 0
+#define VERDICT_PROCEED 1
+
+SEC("cgroup/connect4")
+int connect_v4_dropper(struct bpf_sock_addr *ctx)
+{
+ if (ctx->type != SOCK_STREAM)
+ return VERDICT_PROCEED;
+ if (ctx->user_port == bpf_htons(60123))
+ return VERDICT_REJECT;
+ return VERDICT_PROCEED;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
new file mode 100644
index 000000000000..a587aeca5ae0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern const void bpf_fentry_test1 __ksym;
+extern const void bpf_fentry_test2 __ksym;
+extern const void bpf_fentry_test3 __ksym;
+extern const void bpf_fentry_test4 __ksym;
+extern const void bpf_modify_return_test __ksym;
+extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test1_result = (const void *) addr == &bpf_fentry_test1;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_fentry_test2")
+int BPF_PROG(test2, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test2_result = (const void *) addr == &bpf_fentry_test2;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("kprobe/bpf_fentry_test3")
+int test3(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test3_result = (const void *) addr == &bpf_fentry_test3;
+ return 0;
+}
+
+__u64 test4_result = 0;
+SEC("kretprobe/bpf_fentry_test4")
+int BPF_KRETPROBE(test4)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test4_result = (const void *) addr == &bpf_fentry_test4;
+ return 0;
+}
+
+__u64 test5_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(test5, int a, int *b, int ret)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test5_result = (const void *) addr == &bpf_modify_return_test;
+ return ret;
+}
+
+__u64 test6_result = 0;
+SEC("kprobe/bpf_fentry_test6+0x5")
+int test6(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test6_result = (const void *) addr == &bpf_fentry_test6 + 5;
+ return 0;
+}
+
+__u64 test7_result = 0;
+SEC("kprobe/bpf_fentry_test7+5")
+int test7(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test7_result = (const void *) addr == &bpf_fentry_test7 + 5;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
index a46a264ce24e..55e283050cab 100644
--- a/tools/testing/selftests/bpf/progs/kfree_skb.c
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -109,10 +109,10 @@ int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location)
return 0;
}
-static volatile struct {
+struct {
bool fentry_test_ok;
bool fexit_test_ok;
-} result;
+} result = {};
SEC("fentry/eth_type_trans")
int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev,
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
index b2dcb7d9cb03..5fbd9e232d44 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -9,7 +9,7 @@ extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
__u32 c, __u64 d) __ksym;
extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
int active_res = -1;
-int sk_state = -1;
+int sk_state_res = -1;
int __noinline f1(struct __sk_buff *skb)
{
@@ -28,7 +28,7 @@ int __noinline f1(struct __sk_buff *skb)
if (active)
active_res = *active;
- sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
+ sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->sk_state;
return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
}
diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c
index 52291515cc72..00bf1ca95986 100644
--- a/tools/testing/selftests/bpf/progs/linked_maps1.c
+++ b/tools/testing/selftests/bpf/progs/linked_maps1.c
@@ -75,7 +75,7 @@ int BPF_PROG(handler_exit1)
val = bpf_map_lookup_elem(&map_weak, &key);
if (val)
output_weak1 = *val;
-
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c
index d071adf178bd..43649bce4c54 100644
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -13,21 +13,21 @@
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
__type(key, struct bpf_cgroup_storage_key);
- __type(value, struct percpu_net_cnt);
+ __type(value, union percpu_net_cnt);
} percpu_netcnt SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
__type(key, struct bpf_cgroup_storage_key);
- __type(value, struct net_cnt);
+ __type(value, union net_cnt);
} netcnt SEC(".maps");
SEC("cgroup/skb")
int bpf_nextcnt(struct __sk_buff *skb)
{
- struct percpu_net_cnt *percpu_cnt;
+ union percpu_net_cnt *percpu_cnt;
char fmt[] = "%d %llu %llu\n";
- struct net_cnt *cnt;
+ union net_cnt *cnt;
__u64 ts, dt;
int ret;
diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
new file mode 100644
index 000000000000..aeff3a4f9287
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+#define AF_INET6 10
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sockops_netns_cookies SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_msg_netns_cookies SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sockops")
+int get_netns_cookie_sockops(struct bpf_sock_ops *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ int *cookie;
+ __u32 key = 0;
+
+ if (ctx->family != AF_INET6)
+ return 1;
+
+ if (!sk)
+ return 1;
+
+ switch (ctx->op) {
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ cookie = bpf_sk_storage_get(&sockops_netns_cookies, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!cookie)
+ return 1;
+
+ *cookie = bpf_get_netns_cookie(ctx);
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ bpf_sock_map_update(ctx, &sock_map, &key, BPF_NOEXIST);
+ break;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+SEC("sk_msg")
+int get_netns_cookie_sk_msg(struct sk_msg_md *msg)
+{
+ struct bpf_sock *sk = msg->sk;
+ int *cookie;
+
+ if (msg->family != AF_INET6)
+ return 1;
+
+ if (!sk)
+ return 1;
+
+ cookie = bpf_sk_storage_get(&sk_msg_netns_cookies, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!cookie)
+ return 1;
+
+ *cookie = bpf_get_netns_cookie(msg);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
new file mode 100644
index 000000000000..1bce83b6e3a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <string.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("cgroup/setsockopt")
+int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
+{
+ void *optval_end = ctx->optval_end;
+ int *optval = ctx->optval;
+ char buf[TCP_CA_NAME_MAX];
+ char cc_reno[TCP_CA_NAME_MAX] = "reno";
+ char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
+
+ if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
+ return 1;
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
+ return 0;
+
+ if (!tcp_cc_eq(buf, cc_cubic))
+ return 0;
+
+ if (*optval == 0x2d) {
+ if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &cc_reno,
+ sizeof(cc_reno)))
+ return 0;
+ }
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index 8acdb99b5959..79c8139b63b8 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -33,6 +33,14 @@ int _getsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+ if (bpf_get_netns_cookie(NULL) == 0)
+ return 0;
+
+ if (bpf_get_netns_cookie(ctx) == 0)
+ return 0;
+
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
@@ -123,6 +131,14 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+ if (bpf_get_netns_cookie(NULL) == 0)
+ return 0;
+
+ if (bpf_get_netns_cookie(ctx) == 0)
+ return 0;
+
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c
new file mode 100644
index 000000000000..e550f728962d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/syscall.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <../../../tools/include/linux/filter.h>
+#include <linux/btf.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct args {
+ __u64 log_buf;
+ __u32 log_size;
+ int max_entries;
+ int map_fd;
+ int prog_fd;
+ int btf_fd;
+};
+
+#define BTF_INFO_ENC(kind, kind_flag, vlen) \
+ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+ ((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+ BTF_INT_ENC(encoding, bits_offset, bits)
+
+static int btf_load(void)
+{
+ struct btf_blob {
+ struct btf_header btf_hdr;
+ __u32 types[8];
+ __u32 str;
+ } raw_btf = {
+ .btf_hdr = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+ .type_len = sizeof(__u32) * 8,
+ .str_off = sizeof(__u32) * 8,
+ .str_len = sizeof(__u32),
+ },
+ .types = {
+ /* long */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [1] */
+ /* unsigned long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ },
+ };
+ static union bpf_attr btf_load_attr = {
+ .btf_size = sizeof(raw_btf),
+ };
+
+ btf_load_attr.btf = (long)&raw_btf;
+ return bpf_sys_bpf(BPF_BTF_LOAD, &btf_load_attr, sizeof(btf_load_attr));
+}
+
+SEC("syscall")
+int bpf_prog(struct args *ctx)
+{
+ static char license[] = "GPL";
+ static struct bpf_insn insns[] = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ static union bpf_attr map_create_attr = {
+ .map_type = BPF_MAP_TYPE_HASH,
+ .key_size = 8,
+ .value_size = 8,
+ .btf_key_type_id = 1,
+ .btf_value_type_id = 2,
+ };
+ static union bpf_attr map_update_attr = { .map_fd = 1, };
+ static __u64 key = 12;
+ static __u64 value = 34;
+ static union bpf_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .insn_cnt = sizeof(insns) / sizeof(insns[0]),
+ };
+ int ret;
+
+ ret = btf_load();
+ if (ret <= 0)
+ return ret;
+
+ ctx->btf_fd = ret;
+ map_create_attr.max_entries = ctx->max_entries;
+ map_create_attr.btf_fd = ret;
+
+ prog_load_attr.license = (long) license;
+ prog_load_attr.insns = (long) insns;
+ prog_load_attr.log_buf = ctx->log_buf;
+ prog_load_attr.log_size = ctx->log_size;
+ prog_load_attr.log_level = 1;
+
+ ret = bpf_sys_bpf(BPF_MAP_CREATE, &map_create_attr, sizeof(map_create_attr));
+ if (ret <= 0)
+ return ret;
+ ctx->map_fd = ret;
+ insns[3].imm = ret;
+
+ map_update_attr.map_fd = ret;
+ map_update_attr.key = (long) &key;
+ map_update_attr.value = (long) &value;
+ ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr));
+ if (ret < 0)
+ return ret;
+
+ ret = bpf_sys_bpf(BPF_PROG_LOAD, &prog_load_attr, sizeof(prog_load_attr));
+ if (ret <= 0)
+ return ret;
+ ctx->prog_fd = ret;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
index 739dc2a51e74..910858fe078a 100644
--- a/tools/testing/selftests/bpf/progs/tailcall3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -10,7 +10,7 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int count;
+int count = 0;
SEC("classifier/0")
int bpf_func_0(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c
index f82075b47d7d..bd4be135c39d 100644
--- a/tools/testing/selftests/bpf/progs/tailcall4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall4.c
@@ -10,7 +10,7 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int selector;
+int selector = 0;
#define TAIL_FUNC(x) \
SEC("classifier/" #x) \
diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c
index ce5450744fd4..adf30a33064e 100644
--- a/tools/testing/selftests/bpf/progs/tailcall5.c
+++ b/tools/testing/selftests/bpf/progs/tailcall5.c
@@ -10,7 +10,7 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int selector;
+int selector = 0;
#define TAIL_FUNC(x) \
SEC("classifier/" #x) \
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
index 7b1c04183824..3cc4c12817b5 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
@@ -20,7 +20,7 @@ int subprog_tail(struct __sk_buff *skb)
return 1;
}
-static volatile int count;
+int count = 0;
SEC("classifier/0")
int bpf_func_0(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
index 9a1b166b7fbe..e89368a50b97 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -3,17 +3,35 @@
#include <bpf/bpf_helpers.h>
struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} nop_table SEC(".maps");
+
+struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
__uint(max_entries, 3);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int count;
+int count = 0;
+int noise = 0;
+
+__always_inline int subprog_noise(void)
+{
+ __u32 key = 0;
+
+ bpf_map_lookup_elem(&nop_table, &key);
+ return 0;
+}
__noinline
int subprog_tail_2(struct __sk_buff *skb)
{
+ if (noise)
+ subprog_noise();
bpf_tail_call_static(skb, &jmp_table, 2);
return skb->len * 3;
}
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
new file mode 100644
index 000000000000..2d3a7710e2ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int my_tid;
+
+int kprobe_res;
+int kprobe_multi_res;
+int kretprobe_res;
+int uprobe_res;
+int uretprobe_res;
+int tp_res;
+int pe_res;
+
+static void update(void *ctx, int *res)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid())
+ return;
+
+ *res |= bpf_get_attach_cookie(ctx);
+}
+
+SEC("kprobe/sys_nanosleep")
+int handle_kprobe(struct pt_regs *ctx)
+{
+ update(ctx, &kprobe_res);
+ return 0;
+}
+
+SEC("kretprobe/sys_nanosleep")
+int handle_kretprobe(struct pt_regs *ctx)
+{
+ update(ctx, &kretprobe_res);
+ return 0;
+}
+
+SEC("uprobe/trigger_func")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ update(ctx, &uprobe_res);
+ return 0;
+}
+
+SEC("uretprobe/trigger_func")
+int handle_uretprobe(struct pt_regs *ctx)
+{
+ update(ctx, &uretprobe_res);
+ return 0;
+}
+
+/* bpf_prog_array, used by kernel internally to keep track of attached BPF
+ * programs to a given BPF hook (e.g., for tracepoints) doesn't allow the same
+ * BPF program to be attached multiple times. So have three identical copies
+ * ready to attach to the same tracepoint.
+ */
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp1(struct pt_regs *ctx)
+{
+ update(ctx, &tp_res);
+ return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp2(struct pt_regs *ctx)
+{
+ update(ctx, &tp_res);
+ return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp3(void *ctx)
+{
+ update(ctx, &tp_res);
+ return 1;
+}
+
+SEC("perf_event")
+int handle_pe(struct pt_regs *ctx)
+{
+ update(ctx, &pe_res);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
index c4a9bae96e75..71184af57749 100644
--- a/tools/testing/selftests/bpf/progs/test_check_mtu.c
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -11,8 +11,8 @@
char _license[] SEC("license") = "GPL";
/* Userspace will update with MTU it can see on device */
-static volatile const int GLOBAL_USER_MTU;
-static volatile const __u32 GLOBAL_USER_IFINDEX;
+volatile const int GLOBAL_USER_MTU;
+volatile const __u32 GLOBAL_USER_IFINDEX;
/* BPF-prog will update these with MTU values it can see */
__u32 global_bpf_mtu_xdp = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index 3c1e042962e6..e2a5acc4785c 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -39,8 +39,8 @@ char _license[] SEC("license") = "Dual BSD/GPL";
/**
* Destination port and IP used for UDP encapsulation.
*/
-static volatile const __be16 ENCAPSULATION_PORT;
-static volatile const __be32 ENCAPSULATION_IP;
+volatile const __be16 ENCAPSULATION_PORT;
+volatile const __be32 ENCAPSULATION_IP;
typedef struct {
uint64_t processed_packets_total;
diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c
index 44f5aa2e8956..9a7829c5e4a7 100644
--- a/tools/testing/selftests/bpf/progs/test_core_autosize.c
+++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c
@@ -125,6 +125,16 @@ int handle_downsize(void *ctx)
return 0;
}
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_core_read_int bpf_core_read
+#else
+#define bpf_core_read_int(dst, sz, src) ({ \
+ /* Prevent "subtraction from stack pointer prohibited" */ \
+ volatile long __off = sizeof(*dst) - (sz); \
+ bpf_core_read((char *)(dst) + __off, sz, src); \
+})
+#endif
+
SEC("raw_tp/sys_enter")
int handle_probed(void *ctx)
{
@@ -132,23 +142,23 @@ int handle_probed(void *ctx)
__u64 tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
ptr_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val1), &in->val1);
val1_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val2), &in->val2);
val2_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val3), &in->val3);
val3_probed = tmp;
tmp = 0;
- bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4);
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val4), &in->val4);
val4_probed = tmp;
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_args.c b/tools/testing/selftests/bpf/progs/test_global_func_args.c
index cae309538a9e..e712bf77daae 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func_args.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func_args.c
@@ -8,7 +8,7 @@ struct S {
int v;
};
-static volatile struct S global_variable;
+struct S global_variable = {};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
new file mode 100644
index 000000000000..5f8379aadb29
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test weak ksyms.
+ *
+ * Copyright (c) 2021 Google
+ */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+int out__existing_typed = -1;
+__u64 out__existing_typeless = -1;
+
+__u64 out__non_existent_typeless = -1;
+__u64 out__non_existent_typed = -1;
+
+/* existing weak symbols */
+
+/* test existing weak symbols can be resolved. */
+extern const struct rq runqueues __ksym __weak; /* typed */
+extern const void bpf_prog_active __ksym __weak; /* typeless */
+
+
+/* non-existent weak symbols. */
+
+/* typeless symbols, default to zero. */
+extern const void bpf_link_fops1 __ksym __weak;
+
+/* typed symbols, default to zero. */
+extern const int bpf_link_fops2 __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int pass_handler(const void *ctx)
+{
+ struct rq *rq;
+
+ /* tests existing symbols. */
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
+ if (rq)
+ out__existing_typed = rq->cpu;
+ out__existing_typeless = (__u64)&bpf_prog_active;
+
+ /* tests non-existent symbols. */
+ out__non_existent_typeless = (__u64)&bpf_link_fops1;
+
+ /* tests non-existent symbols. */
+ out__non_existent_typed = (__u64)&bpf_link_fops2;
+
+ if (&bpf_link_fops2) /* can't happen */
+ out__non_existent_typed = (__u64)bpf_per_cpu_ptr(&bpf_link_fops2, 0);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
new file mode 100644
index 000000000000..3a193f42c7e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 set_pid = 0;
+__u64 set_key = 0;
+__u64 set_value = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 2);
+ __type(key, __u64);
+ __type(value, __u64);
+} hash_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bpf_lookup_and_delete_test(const void *ctx)
+{
+ if (set_pid == bpf_get_current_pid_tgid() >> 32)
+ bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
new file mode 100644
index 000000000000..703c08e06442
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Isovalent, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, int);
+ __uint(max_entries, 4);
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 0); /* This will make map creation to fail */
+ __uint(key_size, sizeof(__u32));
+ __array(values, struct inner);
+} mim SEC(".maps");
+
+SEC("xdp")
+int xdp_noop0(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
new file mode 100644
index 000000000000..27df571abf5b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ * 1. If reuse_md->migrating_sk is NULL (SYN packet),
+ * return SK_PASS without selecting a listener.
+ * 2. If reuse_md->migrating_sk is not NULL (socket migration),
+ * select a listener (reuseport_map[migrate_map[cookie]])
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/in.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 256);
+ __type(key, int);
+ __type(value, __u64);
+} reuseport_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 256);
+ __type(key, __u64);
+ __type(value, int);
+} migrate_map SEC(".maps");
+
+int migrated_at_close = 0;
+int migrated_at_close_fastopen = 0;
+int migrated_at_send_synack = 0;
+int migrated_at_recv_ack = 0;
+__be16 server_port;
+
+SEC("xdp")
+int drop_ack(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ struct tcphdr *tcp = NULL;
+
+ if (eth + 1 > data_end)
+ goto pass;
+
+ switch (bpf_ntohs(eth->h_proto)) {
+ case ETH_P_IP: {
+ struct iphdr *ip = (struct iphdr *)(eth + 1);
+
+ if (ip + 1 > data_end)
+ goto pass;
+
+ if (ip->protocol != IPPROTO_TCP)
+ goto pass;
+
+ tcp = (struct tcphdr *)((void *)ip + ip->ihl * 4);
+ break;
+ }
+ case ETH_P_IPV6: {
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)(eth + 1);
+
+ if (ipv6 + 1 > data_end)
+ goto pass;
+
+ if (ipv6->nexthdr != IPPROTO_TCP)
+ goto pass;
+
+ tcp = (struct tcphdr *)(ipv6 + 1);
+ break;
+ }
+ default:
+ goto pass;
+ }
+
+ if (tcp + 1 > data_end)
+ goto pass;
+
+ if (tcp->dest != server_port)
+ goto pass;
+
+ if (!tcp->syn && tcp->ack)
+ return XDP_DROP;
+
+pass:
+ return XDP_PASS;
+}
+
+SEC("sk_reuseport/migrate")
+int migrate_reuseport(struct sk_reuseport_md *reuse_md)
+{
+ int *key, flags = 0, state, err;
+ __u64 cookie;
+
+ if (!reuse_md->migrating_sk)
+ return SK_PASS;
+
+ state = reuse_md->migrating_sk->state;
+ cookie = bpf_get_socket_cookie(reuse_md->sk);
+
+ key = bpf_map_lookup_elem(&migrate_map, &cookie);
+ if (!key)
+ return SK_DROP;
+
+ err = bpf_sk_select_reuseport(reuse_md, &reuseport_map, key, flags);
+ if (err)
+ return SK_PASS;
+
+ switch (state) {
+ case BPF_TCP_ESTABLISHED:
+ __sync_fetch_and_add(&migrated_at_close, 1);
+ break;
+ case BPF_TCP_SYN_RECV:
+ __sync_fetch_and_add(&migrated_at_close_fastopen, 1);
+ break;
+ case BPF_TCP_NEW_SYN_RECV:
+ if (!reuse_md->len)
+ __sync_fetch_and_add(&migrated_at_send_synack, 1);
+ else
+ __sync_fetch_and_add(&migrated_at_recv_ack, 1);
+ break;
+ }
+
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_link.c b/tools/testing/selftests/bpf/progs/test_perf_link.c
new file mode 100644
index 000000000000..c1db9fd98d0c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_link.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int run_cnt = 0;
+
+SEC("perf_event")
+int handler(struct pt_regs *ctx)
+{
+ __sync_fetch_and_add(&run_cnt, 1);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
index ecbeea2df259..fc8e8a34a3db 100644
--- a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
+++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
@@ -5,7 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-static volatile const struct {
+const struct {
unsigned a[4];
/*
* if the struct's size is multiple of 16, compiler will put it into
@@ -15,11 +15,11 @@ static volatile const struct {
char _y;
} rdonly_values = { .a = {2, 3, 4, 5} };
-static volatile struct {
+struct {
unsigned did_run;
unsigned iters;
unsigned sum;
-} res;
+} res = {};
SEC("raw_tracepoint/sys_enter:skip_loop")
int skip_loop(struct pt_regs *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index 6b3f288b7c63..eaa7d9dba0be 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -35,7 +35,7 @@ long prod_pos = 0;
/* inner state */
long seq = 0;
-SEC("tp/syscalls/sys_enter_getpgid")
+SEC("fentry/__x64_sys_getpgid")
int test_ringbuf(void *ctx)
{
int cur_pid = bpf_get_current_pid_tgid() >> 32;
@@ -48,7 +48,7 @@ int test_ringbuf(void *ctx)
sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
if (!sample) {
__sync_fetch_and_add(&dropped, 1);
- return 1;
+ return 0;
}
sample->pid = pid;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index e83d0b48d80c..8249075f088f 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -91,7 +91,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_use_after_free")
+SEC("classifier/err_use_after_free")
int bpf_sk_lookup_uaf(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -106,7 +106,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
return family;
}
-SEC("classifier/fail_modify_sk_pointer")
+SEC("classifier/err_modify_sk_pointer")
int bpf_sk_lookup_modptr(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -121,7 +121,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_modify_sk_or_null_pointer")
+SEC("classifier/err_modify_sk_or_null_pointer")
int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -135,7 +135,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_no_release")
+SEC("classifier/err_no_release")
int bpf_sk_lookup_test2(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -144,7 +144,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_release_twice")
+SEC("classifier/err_release_twice")
int bpf_sk_lookup_test3(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -156,7 +156,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_release_unchecked")
+SEC("classifier/err_release_unchecked")
int bpf_sk_lookup_test4(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -173,7 +173,7 @@ void lookup_no_release(struct __sk_buff *skb)
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
}
-SEC("classifier/fail_no_release_subcall")
+SEC("classifier/err_no_release_subcall")
int bpf_sk_lookup_test5(struct __sk_buff *skb)
{
lookup_no_release(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
index 374ccef704e1..441fa1c552c8 100644
--- a/tools/testing/selftests/bpf/progs/test_skeleton.c
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -38,11 +38,11 @@ extern int LINUX_KERNEL_VERSION __kconfig;
bool bpf_syscall = 0;
int kern_ver = 0;
+struct s out5 = {};
+
SEC("raw_tp/sys_enter")
int handler(const void *ctx)
{
- static volatile struct s out5;
-
out1 = in1;
out2 = in2;
out3 = in3;
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
index e35129bea0a0..8fda07544023 100644
--- a/tools/testing/selftests/bpf/progs/test_snprintf.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -3,7 +3,6 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
__u32 pid = 0;
@@ -60,9 +59,9 @@ int handler(const void *ctx)
/* Kernel pointers */
addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p",
0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55);
- /* Strings embedding */
- str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s %+05s",
- str1, longstr);
+ /* Strings and single-byte character embedding */
+ str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s % 9c %+2c %-3c %04c %0c %+05s",
+ str1, 'a', 'b', 'c', 'd', 'e', longstr);
/* Overflow */
over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow");
/* Padding of fixed width numbers */
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
index 402adaf344f9..3095837334d3 100644
--- a/tools/testing/selftests/bpf/progs/test_snprintf_single.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
@@ -5,7 +5,7 @@
#include <bpf/bpf_helpers.h>
/* The format string is filled from the userspace such that loading fails */
-static const char fmt[10];
+const char fmt[10];
SEC("raw_tp/sys_enter")
int handler(const void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index a39eba9f5201..a1cc58b10c7c 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -28,8 +28,8 @@ struct {
__type(value, unsigned int);
} verdict_map SEC(".maps");
-static volatile bool test_sockmap; /* toggled by user-space */
-static volatile bool test_ingress; /* toggled by user-space */
+bool test_sockmap = false; /* toggled by user-space */
+bool test_ingress = false; /* toggled by user-space */
SEC("sk_skb/stream_parser")
int prog_stream_parser(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c
index ea1a6c4c7172..4f0b612e1661 100644
--- a/tools/testing/selftests/bpf/progs/test_static_linked1.c
+++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c
@@ -4,10 +4,10 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-/* 8-byte aligned .bss */
-static volatile long static_var1;
-static volatile int static_var11;
-int var1 = 0;
+/* 8-byte aligned .data */
+static volatile long static_var1 = 2;
+static volatile int static_var2 = 3;
+int var1 = -1;
/* 4-byte aligned .rodata */
const volatile int rovar1;
@@ -21,7 +21,7 @@ static __noinline int subprog(int x)
SEC("raw_tp/sys_enter")
int handler1(const void *ctx)
{
- var1 = subprog(rovar1) + static_var1 + static_var11;
+ var1 = subprog(rovar1) + static_var1 + static_var2;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c
index 54d8d1ab577c..766ebd502a60 100644
--- a/tools/testing/selftests/bpf/progs/test_static_linked2.c
+++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c
@@ -4,10 +4,10 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-/* 4-byte aligned .bss */
-static volatile int static_var2;
-static volatile int static_var22;
-int var2 = 0;
+/* 4-byte aligned .data */
+static volatile int static_var1 = 5;
+static volatile int static_var2 = 6;
+int var2 = -1;
/* 8-byte aligned .rodata */
const volatile long rovar2;
@@ -21,7 +21,7 @@ static __noinline int subprog(int x)
SEC("raw_tp/sys_enter")
int handler2(const void *ctx)
{
- var2 = subprog(rovar2) + static_var2 + static_var22;
+ var2 = subprog(rovar2) + static_var1 + static_var2;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
index d3c5673c0218..b7c37ca09544 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -4,8 +4,18 @@
const char LICENSE[] SEC("license") = "GPL";
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array SEC(".maps");
+
__noinline int sub1(int x)
{
+ int key = 0;
+
+ bpf_map_lookup_elem(&array, &key);
return x + 1;
}
@@ -23,6 +33,9 @@ static __noinline int sub3(int z)
static __noinline int sub4(int w)
{
+ int key = 0;
+
+ bpf_map_lookup_elem(&array, &key);
return w + sub3(5) + sub1(6);
}
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
new file mode 100644
index 000000000000..e6cb09259408
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define PT_REGS_SIZE sizeof(struct pt_regs)
+
+/*
+ * The kernel struct pt_regs isn't exported in its entirety to userspace.
+ * Pass it as an array to task_pt_regs.c
+ */
+char current_regs[PT_REGS_SIZE] = {};
+char ctx_regs[PT_REGS_SIZE] = {};
+int uprobe_res = 0;
+
+SEC("uprobe/trigger_func")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ struct task_struct *current;
+ struct pt_regs *regs;
+
+ current = bpf_get_current_task_btf();
+ regs = (struct pt_regs *) bpf_task_pt_regs(current);
+ if (bpf_probe_read_kernel(current_regs, PT_REGS_SIZE, regs))
+ return 0;
+ if (bpf_probe_read_kernel(ctx_regs, PT_REGS_SIZE, ctx))
+ return 0;
+
+ /* Prove that uprobe was run */
+ uprobe_res = 1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
new file mode 100644
index 000000000000..18a3a7ed924a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* Dummy prog to test TC-BPF API */
+
+SEC("classifier")
+int cls(struct __sk_buff *skb)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 84cd63259554..a0e7762b1e5a 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -528,7 +528,6 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb)
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
- char buf[sizeof(struct v6hdr)];
struct gre_hdr greh;
struct udphdr udph;
int olen = len;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c
new file mode 100644
index 000000000000..d7b88cd05afd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_context(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ __u32 *metadata = (void *)(long)xdp->data_meta;
+ __u32 ret;
+
+ if (metadata + 1 > data)
+ return XDP_ABORTED;
+ ret = *metadata;
+ if (bpf_xdp_adjust_meta(xdp, 4))
+ return XDP_ABORTED;
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
new file mode 100644
index 000000000000..5f5309791649
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int counter;
+ struct bpf_timer timer;
+ struct bpf_spin_lock lock; /* unused */
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap_malloc SEC(".maps");
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __type(value, struct elem);
+} lru SEC(".maps");
+
+__u64 bss_data;
+__u64 err;
+__u64 ok;
+__u64 callback_check = 52;
+__u64 callback2_check = 52;
+
+#define ARRAY 1
+#define HTAB 2
+#define HTAB_MALLOC 3
+#define LRU 4
+
+/* callback for array and lru timers */
+static int timer_cb1(void *map, int *key, struct bpf_timer *timer)
+{
+ /* increment bss variable twice.
+ * Once via array timer callback and once via lru timer callback
+ */
+ bss_data += 5;
+
+ /* *key == 0 - the callback was called for array timer.
+ * *key == 4 - the callback was called from lru timer.
+ */
+ if (*key == ARRAY) {
+ struct bpf_timer *lru_timer;
+ int lru_key = LRU;
+
+ /* rearm array timer to be called again in ~35 seconds */
+ if (bpf_timer_start(timer, 1ull << 35, 0) != 0)
+ err |= 1;
+
+ lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
+ if (!lru_timer)
+ return 0;
+ bpf_timer_set_callback(lru_timer, timer_cb1);
+ if (bpf_timer_start(lru_timer, 0, 0) != 0)
+ err |= 2;
+ } else if (*key == LRU) {
+ int lru_key, i;
+
+ for (i = LRU + 1;
+ i <= 100 /* for current LRU eviction algorithm this number
+ * should be larger than ~ lru->max_entries * 2
+ */;
+ i++) {
+ struct elem init = {};
+
+ /* lru_key cannot be used as loop induction variable
+ * otherwise the loop will be unbounded.
+ */
+ lru_key = i;
+
+ /* add more elements into lru map to push out current
+ * element and force deletion of this timer
+ */
+ bpf_map_update_elem(map, &lru_key, &init, 0);
+ /* look it up to bump it into active list */
+ bpf_map_lookup_elem(map, &lru_key);
+
+ /* keep adding until *key changes underneath,
+ * which means that key/timer memory was reused
+ */
+ if (*key != LRU)
+ break;
+ }
+
+ /* check that the timer was removed */
+ if (bpf_timer_cancel(timer) != -EINVAL)
+ err |= 4;
+ ok |= 1;
+ }
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct bpf_timer *arr_timer, *lru_timer;
+ struct elem init = {};
+ int lru_key = LRU;
+ int array_key = ARRAY;
+
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+
+ bpf_map_update_elem(&lru, &lru_key, &init, 0);
+ lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
+ if (!lru_timer)
+ return 0;
+ bpf_timer_init(lru_timer, &lru, CLOCK_MONOTONIC);
+
+ bpf_timer_set_callback(arr_timer, timer_cb1);
+ bpf_timer_start(arr_timer, 0 /* call timer_cb1 asap */, 0);
+
+ /* init more timers to check that array destruction
+ * doesn't leak timer memory.
+ */
+ array_key = 0;
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+ return 0;
+}
+
+/* callback for prealloc and non-prealloca hashtab timers */
+static int timer_cb2(void *map, int *key, struct hmap_elem *val)
+{
+ if (*key == HTAB)
+ callback_check--;
+ else
+ callback2_check--;
+ if (val->counter > 0 && --val->counter) {
+ /* re-arm the timer again to execute after 1 usec */
+ bpf_timer_start(&val->timer, 1000, 0);
+ } else if (*key == HTAB) {
+ struct bpf_timer *arr_timer;
+ int array_key = ARRAY;
+
+ /* cancel arr_timer otherwise bpf_fentry_test1 prog
+ * will stay alive forever.
+ */
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ if (bpf_timer_cancel(arr_timer) != 1)
+ /* bpf_timer_cancel should return 1 to indicate
+ * that arr_timer was active at this time
+ */
+ err |= 8;
+
+ /* try to cancel ourself. It shouldn't deadlock. */
+ if (bpf_timer_cancel(&val->timer) != -EDEADLK)
+ err |= 16;
+
+ /* delete this key and this timer anyway.
+ * It shouldn't deadlock either.
+ */
+ bpf_map_delete_elem(map, key);
+
+ /* in preallocated hashmap both 'key' and 'val' could have been
+ * reused to store another map element (like in LRU above),
+ * but in controlled test environment the below test works.
+ * It's not a use-after-free. The memory is owned by the map.
+ */
+ if (bpf_timer_start(&val->timer, 1000, 0) != -EINVAL)
+ err |= 32;
+ ok |= 2;
+ } else {
+ if (*key != HTAB_MALLOC)
+ err |= 64;
+
+ /* try to cancel ourself. It shouldn't deadlock. */
+ if (bpf_timer_cancel(&val->timer) != -EDEADLK)
+ err |= 128;
+
+ /* delete this key and this timer anyway.
+ * It shouldn't deadlock either.
+ */
+ bpf_map_delete_elem(map, key);
+
+ /* in non-preallocated hashmap both 'key' and 'val' are RCU
+ * protected and still valid though this element was deleted
+ * from the map. Arm this timer for ~35 seconds. When callback
+ * finishes the call_rcu will invoke:
+ * htab_elem_free_rcu
+ * check_and_free_timer
+ * bpf_timer_cancel_and_free
+ * to cancel this 35 second sleep and delete the timer for real.
+ */
+ if (bpf_timer_start(&val->timer, 1ull << 35, 0) != 0)
+ err |= 256;
+ ok |= 4;
+ }
+ return 0;
+}
+
+int bpf_timer_test(void)
+{
+ struct hmap_elem *val;
+ int key = HTAB, key_malloc = HTAB_MALLOC;
+
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val) {
+ if (bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME) != 0)
+ err |= 512;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ bpf_timer_start(&val->timer, 1000, 0);
+ }
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val) {
+ if (bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME) != 0)
+ err |= 1024;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ bpf_timer_start(&val->timer, 1000, 0);
+ }
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG(test2, int a, int b)
+{
+ struct hmap_elem init = {}, *val;
+ int key = HTAB, key_malloc = HTAB_MALLOC;
+
+ init.counter = 10; /* number of times to trigger timer_cb2 */
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+ /* update the same key to free the timer */
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+ /* update the same key to free the timer */
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+
+ /* init more timers to check that htab operations
+ * don't leak timer memory.
+ */
+ key = 0;
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+ bpf_map_delete_elem(&hmap, &key);
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+
+ /* and with non-prealloc htab */
+ key_malloc = 0;
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+ bpf_map_delete_elem(&hmap_malloc, &key_malloc);
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+
+ return bpf_timer_test();
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim.c b/tools/testing/selftests/bpf/progs/timer_mim.c
new file mode 100644
index 000000000000..2fee7ab105ef
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_mim.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int pad; /* unused */
+ struct bpf_timer timer;
+};
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} inner_htab SEC(".maps");
+
+#define ARRAY_KEY 1
+#define HASH_KEY 1234
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+ .values = { [ARRAY_KEY] = &inner_htab },
+};
+
+__u64 err;
+__u64 ok;
+__u64 cnt;
+
+static int timer_cb1(void *map, int *key, struct hmap_elem *val);
+
+static int timer_cb2(void *map, int *key, struct hmap_elem *val)
+{
+ cnt++;
+ bpf_timer_set_callback(&val->timer, timer_cb1);
+ if (bpf_timer_start(&val->timer, 1000, 0))
+ err |= 1;
+ ok |= 1;
+ return 0;
+}
+
+/* callback for inner hash map */
+static int timer_cb1(void *map, int *key, struct hmap_elem *val)
+{
+ cnt++;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ if (bpf_timer_start(&val->timer, 1000, 0))
+ err |= 2;
+ /* Do a lookup to make sure 'map' and 'key' pointers are correct */
+ bpf_map_lookup_elem(map, key);
+ ok |= 2;
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct hmap_elem init = {};
+ struct bpf_map *inner_map;
+ struct hmap_elem *val;
+ int array_key = ARRAY_KEY;
+ int hash_key = HASH_KEY;
+
+ inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
+ if (!inner_map)
+ return 0;
+
+ bpf_map_update_elem(inner_map, &hash_key, &init, 0);
+ val = bpf_map_lookup_elem(inner_map, &hash_key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->timer, inner_map, CLOCK_MONOTONIC);
+ if (bpf_timer_set_callback(&val->timer, timer_cb1))
+ err |= 4;
+ if (bpf_timer_start(&val->timer, 0, 0))
+ err |= 8;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim_reject.c b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
new file mode 100644
index 000000000000..5d648e3d8a41
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int pad; /* unused */
+ struct bpf_timer timer;
+};
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} inner_htab SEC(".maps");
+
+#define ARRAY_KEY 1
+#define ARRAY_KEY2 2
+#define HASH_KEY 1234
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+ .values = { [ARRAY_KEY] = &inner_htab },
+};
+
+__u64 err;
+__u64 ok;
+__u64 cnt;
+
+/* callback for inner hash map */
+static int timer_cb(void *map, int *key, struct hmap_elem *val)
+{
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct hmap_elem init = {};
+ struct bpf_map *inner_map, *inner_map2;
+ struct hmap_elem *val;
+ int array_key = ARRAY_KEY;
+ int array_key2 = ARRAY_KEY2;
+ int hash_key = HASH_KEY;
+
+ inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
+ if (!inner_map)
+ return 0;
+
+ inner_map2 = bpf_map_lookup_elem(&outer_arr, &array_key2);
+ if (!inner_map2)
+ return 0;
+ bpf_map_update_elem(inner_map, &hash_key, &init, 0);
+ val = bpf_map_lookup_elem(inner_map, &hash_key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->timer, inner_map2, CLOCK_MONOTONIC);
+ if (bpf_timer_set_callback(&val->timer, timer_cb))
+ err |= 4;
+ if (bpf_timer_start(&val->timer, 0, 0))
+ err |= 8;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
index 8ca7f399b670..119582aa105a 100644
--- a/tools/testing/selftests/bpf/progs/trace_printk.c
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -10,11 +10,11 @@ char _license[] SEC("license") = "GPL";
int trace_printk_ret = 0;
int trace_printk_ran = 0;
-SEC("tp/raw_syscalls/sys_enter")
+const char fmt[] = "Testing,testing %d\n";
+
+SEC("fentry/__x64_sys_nanosleep")
int sys_enter(void *ctx)
{
- static const char fmt[] = "testing,testing %d\n";
-
trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
++trace_printk_ran);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
new file mode 100644
index 000000000000..880debcbcd65
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+#include <string.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* One map use devmap, another one use devmap_hash for testing */
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 1024);
+} map_all SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 128);
+} map_egress SEC(".maps");
+
+/* map to store egress interfaces mac addresses */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __be64);
+ __uint(max_entries, 128);
+} mac_map SEC(".maps");
+
+SEC("xdp_redirect_map_multi")
+int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ int if_index = ctx->ingress_ifindex;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+ __u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */
+ if (h_proto == bpf_htons(ETH_P_IP))
+ return bpf_redirect_map(&map_all, 0,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+ /* Using IPv6 for none flag testing */
+ else if (h_proto == bpf_htons(ETH_P_IPV6))
+ return bpf_redirect_map(&map_all, if_index, 0);
+ /* All others for BPF_F_BROADCAST testing */
+ else
+ return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST);
+}
+
+/* The following 2 progs are for 2nd devmap prog testing */
+SEC("xdp_redirect_map_ingress")
+int xdp_redirect_map_all_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&map_egress, 0,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp_devmap/map_prog")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = ctx->egress_ifindex;
+ struct ethhdr *eth = data;
+ __u64 nh_off;
+ __be64 *mac;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ mac = bpf_map_lookup_elem(&mac_map, &key);
+ if (mac)
+ __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c
index 94e6c2b281cb..5f725c720e00 100644
--- a/tools/testing/selftests/bpf/progs/xdp_tx.c
+++ b/tools/testing/selftests/bpf/progs/xdp_tx.c
@@ -3,7 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("tx")
+SEC("xdp")
int xdp_tx(struct xdp_md *xdp)
{
return XDP_TX;
diff --git a/tools/testing/selftests/bpf/test_bpftool.sh b/tools/testing/selftests/bpf/test_bpftool.sh
index 66690778e36d..718f59692ccb 100755
--- a/tools/testing/selftests/bpf/test_bpftool.sh
+++ b/tools/testing/selftests/bpf/test_bpftool.sh
@@ -2,4 +2,10 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2020 SUSE LLC.
+# 'make -C tools/testing/selftests/bpf install' will install to SCRIPT_DIR
+SCRIPT_DIR=$(dirname $(realpath $0))
+
+# 'make -C tools/testing/selftests/bpf' will install to BPFTOOL_INSTALL_PATH
+BPFTOOL_INSTALL_PATH="$SCRIPT_DIR"/tools/sbin
+export PATH=$SCRIPT_DIR:$BPFTOOL_INSTALL_PATH:$PATH
python3 -m unittest -v test_bpftool.TestBpftool
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index ac349a5cea7e..b03a87571592 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -22,7 +22,7 @@ KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
cd $KDIR_ROOT_DIR
if [ ! -e tools/bpf/bpftool/Makefile ]; then
echo -e "skip: bpftool files not found!\n"
- exit 0
+ exit 4 # KSFT_SKIP=4
fi
ERROR=0
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
new file mode 100755
index 000000000000..be54b7335a76
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -0,0 +1,586 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+#
+# Copyright (C) 2021 Isovalent, Inc.
+
+import argparse
+import re
+import os, sys
+
+LINUX_ROOT = os.path.abspath(os.path.join(__file__,
+ os.pardir, os.pardir, os.pardir, os.pardir, os.pardir))
+BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool')
+retval = 0
+
+class BlockParser(object):
+ """
+ A parser for extracting set of values from blocks such as enums.
+ @reader: a pointer to the open file to parse
+ """
+ def __init__(self, reader):
+ self.reader = reader
+
+ def search_block(self, start_marker):
+ """
+ Search for a given structure in a file.
+ @start_marker: regex marking the beginning of a structure to parse
+ """
+ offset = self.reader.tell()
+ array_start = re.search(start_marker, self.reader.read())
+ if array_start is None:
+ raise Exception('Failed to find start of block')
+ self.reader.seek(offset + array_start.start())
+
+ def parse(self, pattern, end_marker):
+ """
+ Parse a block and return a set of values. Values to extract must be
+ on separate lines in the file.
+ @pattern: pattern used to identify the values to extract
+ @end_marker: regex marking the end of the block to parse
+ """
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if not line or re.match(end_marker, line):
+ break
+ capture = pattern.search(line)
+ if capture and pattern.groups >= 1:
+ entries.add(capture.group(1))
+ return entries
+
+class ArrayParser(BlockParser):
+ """
+ A parser for extracting dicionaries of values from some BPF-related arrays.
+ @reader: a pointer to the open file to parse
+ @array_name: name of the array to parse
+ """
+ end_marker = re.compile('^};')
+
+ def __init__(self, reader, array_name):
+ self.array_name = array_name
+ self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n')
+ super().__init__(reader)
+
+ def search_block(self):
+ """
+ Search for the given array in a file.
+ """
+ super().search_block(self.start_marker);
+
+ def parse(self):
+ """
+ Parse a block and return data as a dictionary. Items to extract must be
+ on separate lines in the file.
+ """
+ pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$')
+ entries = {}
+ while True:
+ line = self.reader.readline()
+ if line == '' or re.match(self.end_marker, line):
+ break
+ capture = pattern.search(line)
+ if capture:
+ entries[capture.group(1)] = capture.group(2)
+ return entries
+
+class InlineListParser(BlockParser):
+ """
+ A parser for extracting set of values from inline lists.
+ """
+ def parse(self, pattern, end_marker):
+ """
+ Parse a block and return a set of values. Multiple values to extract
+ can be on a same line in the file.
+ @pattern: pattern used to identify the values to extract
+ @end_marker: regex marking the end of the block to parse
+ """
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if not line:
+ break
+ entries.update(pattern.findall(line))
+ if re.search(end_marker, line):
+ break
+ return entries
+
+class FileExtractor(object):
+ """
+ A generic reader for extracting data from a given file. This class contains
+ several helper methods that wrap arround parser objects to extract values
+ from different structures.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def __init__(self):
+ self.reader = open(self.filename, 'r')
+
+ def close(self):
+ """
+ Close the file used by the parser.
+ """
+ self.reader.close()
+
+ def reset_read(self):
+ """
+ Reset the file position indicator for this parser. This is useful when
+ parsing several structures in the file without respecting the order in
+ which those structures appear in the file.
+ """
+ self.reader.seek(0)
+
+ def get_types_from_array(self, array_name):
+ """
+ Search for and parse an array associating names to BPF_* enum members,
+ for example:
+
+ const char * const prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = "unspec",
+ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
+ [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ };
+
+ Return a dictionary with the enum member names as keys and the
+ associated names as values, for example:
+
+ {'BPF_PROG_TYPE_UNSPEC': 'unspec',
+ 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter',
+ 'BPF_PROG_TYPE_KPROBE': 'kprobe'}
+
+ @array_name: name of the array to parse
+ """
+ array_parser = ArrayParser(self.reader, array_name)
+ array_parser.search_block()
+ return array_parser.parse()
+
+ def get_enum(self, enum_name):
+ """
+ Search for and parse an enum containing BPF_* members, for example:
+
+ enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ };
+
+ Return a set containing all member names, for example:
+
+ {'BPF_PROG_TYPE_UNSPEC',
+ 'BPF_PROG_TYPE_SOCKET_FILTER',
+ 'BPF_PROG_TYPE_KPROBE'}
+
+ @enum_name: name of the enum to parse
+ """
+ start_marker = re.compile(f'enum {enum_name} {{\n')
+ pattern = re.compile('^\s*(BPF_\w+),?$')
+ end_marker = re.compile('^};')
+ parser = BlockParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+ def __get_description_list(self, start_marker, pattern, end_marker):
+ parser = InlineListParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+ def get_rst_list(self, block_name):
+ """
+ Search for and parse a list of type names from RST documentation, for
+ example:
+
+ | *TYPE* := {
+ | **socket** | **kprobe** |
+ | **kretprobe**
+ | }
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'\*{block_name}\* := {{')
+ pattern = re.compile('\*\*([\w/-]+)\*\*')
+ end_marker = re.compile('}\n')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_help_list(self, block_name):
+ """
+ Search for and parse a list of type names from a help message in
+ bpftool, for example:
+
+ " TYPE := { socket | kprobe |\\n"
+ " kretprobe }\\n"
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'"\s*{block_name} := {{')
+ pattern = re.compile('([\w/]+) [|}]')
+ end_marker = re.compile('}')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_help_list_macro(self, macro):
+ """
+ Search for and parse a list of values from a help message starting with
+ a macro in bpftool, for example:
+
+ " " HELP_SPEC_OPTIONS " |\\n"
+ " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} }\\n"
+
+ Return a set containing all item names, for example:
+
+ {'-f', '--bpffs', '-m', '--mapcompat', '-n', '--nomount'}
+
+ @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example
+ """
+ start_marker = re.compile(f'"\s*{macro}\s*" [|}}]')
+ pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])')
+ end_marker = re.compile('}\\\\n')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def default_options(self):
+ """
+ Return the default options contained in HELP_SPEC_OPTIONS
+ """
+ return { '-j', '--json', '-p', '--pretty', '-d', '--debug' }
+
+ def get_bashcomp_list(self, block_name):
+ """
+ Search for and parse a list of type names from a variable in bash
+ completion file, for example:
+
+ local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \\
+ kretprobe'
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'local {block_name}=\'')
+ pattern = re.compile('(?:.*=\')?([\w/]+)')
+ end_marker = re.compile('\'$')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+class SourceFileExtractor(FileExtractor):
+ """
+ An abstract extractor for a source file with usage message.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def get_options(self):
+ return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS'))
+
+class ProgFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's prog.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'prog.c')
+
+ def get_prog_types(self):
+ return self.get_types_from_array('prog_type_name')
+
+ def get_attach_types(self):
+ return self.get_types_from_array('attach_type_strings')
+
+ def get_prog_attach_help(self):
+ return self.get_help_list('ATTACH_TYPE')
+
+class MapFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's map.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'map.c')
+
+ def get_map_types(self):
+ return self.get_types_from_array('map_type_name')
+
+ def get_map_help(self):
+ return self.get_help_list('TYPE')
+
+class CgroupFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's cgroup.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'cgroup.c')
+
+ def get_prog_attach_help(self):
+ return self.get_help_list('ATTACH_TYPE')
+
+class CommonFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's common.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'common.c')
+
+ def __init__(self):
+ super().__init__()
+ self.attach_types = {}
+
+ def get_attach_types(self):
+ if not self.attach_types:
+ self.attach_types = self.get_types_from_array('attach_type_name')
+ return self.attach_types
+
+ def get_cgroup_attach_types(self):
+ if not self.attach_types:
+ self.get_attach_types()
+ cgroup_types = {}
+ for (key, value) in self.attach_types.items():
+ if key.find('BPF_CGROUP') != -1:
+ cgroup_types[key] = value
+ return cgroup_types
+
+class GenericSourceExtractor(SourceFileExtractor):
+ """
+ An extractor for generic source code files.
+ """
+ filename = ""
+
+ def __init__(self, filename):
+ self.filename = os.path.join(BPFTOOL_DIR, filename)
+ super().__init__()
+
+class BpfHeaderExtractor(FileExtractor):
+ """
+ An extractor for the UAPI BPF header.
+ """
+ filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h')
+
+ def get_prog_types(self):
+ return self.get_enum('bpf_prog_type')
+
+ def get_map_types(self):
+ return self.get_enum('bpf_map_type')
+
+ def get_attach_types(self):
+ return self.get_enum('bpf_attach_type')
+
+class ManPageExtractor(FileExtractor):
+ """
+ An abstract extractor for an RST documentation page.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def get_options(self):
+ return self.get_rst_list('OPTIONS')
+
+class ManProgExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-prog.rst.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst')
+
+ def get_attach_types(self):
+ return self.get_rst_list('ATTACH_TYPE')
+
+class ManMapExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-map.rst.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst')
+
+ def get_map_types(self):
+ return self.get_rst_list('TYPE')
+
+class ManCgroupExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-cgroup.rst.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst')
+
+ def get_attach_types(self):
+ return self.get_rst_list('ATTACH_TYPE')
+
+class ManGenericExtractor(ManPageExtractor):
+ """
+ An extractor for generic RST documentation pages.
+ """
+ filename = ""
+
+ def __init__(self, filename):
+ self.filename = os.path.join(BPFTOOL_DIR, filename)
+ super().__init__()
+
+class BashcompExtractor(FileExtractor):
+ """
+ An extractor for bpftool's bash completion file.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool')
+
+ def get_prog_attach_types(self):
+ return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
+
+ def get_map_types(self):
+ return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES')
+
+ def get_cgroup_attach_types(self):
+ return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES')
+
+def verify(first_set, second_set, message):
+ """
+ Print all values that differ between two sets.
+ @first_set: one set to compare
+ @second_set: another set to compare
+ @message: message to print for values belonging to only one of the sets
+ """
+ global retval
+ diff = first_set.symmetric_difference(second_set)
+ if diff:
+ print(message, diff)
+ retval = 1
+
+def main():
+ # No arguments supported at this time, but print usage for -h|--help
+ argParser = argparse.ArgumentParser(description="""
+ Verify that bpftool's code, help messages, documentation and bash
+ completion are all in sync on program types, map types, attach types, and
+ options. Also check that bpftool is in sync with the UAPI BPF header.
+ """)
+ args = argParser.parse_args()
+
+ # Map types (enum)
+
+ bpf_info = BpfHeaderExtractor()
+ ref = bpf_info.get_map_types()
+
+ map_info = MapFileExtractor()
+ source_map_items = map_info.get_map_types()
+ map_types_enum = set(source_map_items.keys())
+
+ verify(ref, map_types_enum,
+ f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):')
+
+ # Map types (names)
+
+ source_map_types = set(source_map_items.values())
+ source_map_types.discard('unspec')
+
+ help_map_types = map_info.get_map_help()
+ help_map_options = map_info.get_options()
+ map_info.close()
+
+ man_map_info = ManMapExtractor()
+ man_map_options = man_map_info.get_options()
+ man_map_types = man_map_info.get_map_types()
+ man_map_info.close()
+
+ bashcomp_info = BashcompExtractor()
+ bashcomp_map_types = bashcomp_info.get_map_types()
+
+ verify(source_map_types, help_map_types,
+ f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):')
+ verify(source_map_types, man_map_types,
+ f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):')
+ verify(help_map_options, man_map_options,
+ f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
+ verify(source_map_types, bashcomp_map_types,
+ f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):')
+
+ # Program types (enum)
+
+ ref = bpf_info.get_prog_types()
+
+ prog_info = ProgFileExtractor()
+ prog_types = set(prog_info.get_prog_types().keys())
+
+ verify(ref, prog_types,
+ f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):')
+
+ # Attach types (enum)
+
+ ref = bpf_info.get_attach_types()
+ bpf_info.close()
+
+ common_info = CommonFileExtractor()
+ attach_types = common_info.get_attach_types()
+
+ verify(ref, attach_types,
+ f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):')
+
+ # Attach types (names)
+
+ source_prog_attach_types = set(prog_info.get_attach_types().values())
+
+ help_prog_attach_types = prog_info.get_prog_attach_help()
+ help_prog_options = prog_info.get_options()
+ prog_info.close()
+
+ man_prog_info = ManProgExtractor()
+ man_prog_options = man_prog_info.get_options()
+ man_prog_attach_types = man_prog_info.get_attach_types()
+ man_prog_info.close()
+
+ bashcomp_info.reset_read() # We stopped at map types, rewind
+ bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
+
+ verify(source_prog_attach_types, help_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ verify(source_prog_attach_types, man_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):')
+ verify(help_prog_options, man_prog_options,
+ f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):')
+ verify(source_prog_attach_types, bashcomp_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
+
+ # Cgroup attach types
+
+ source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values())
+ common_info.close()
+
+ cgroup_info = CgroupFileExtractor()
+ help_cgroup_attach_types = cgroup_info.get_prog_attach_help()
+ help_cgroup_options = cgroup_info.get_options()
+ cgroup_info.close()
+
+ man_cgroup_info = ManCgroupExtractor()
+ man_cgroup_options = man_cgroup_info.get_options()
+ man_cgroup_attach_types = man_cgroup_info.get_attach_types()
+ man_cgroup_info.close()
+
+ bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types()
+ bashcomp_info.close()
+
+ verify(source_cgroup_attach_types, help_cgroup_attach_types,
+ f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ verify(source_cgroup_attach_types, man_cgroup_attach_types,
+ f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
+ verify(help_cgroup_options, man_cgroup_options,
+ f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
+ verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types,
+ f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):')
+
+ # Options for remaining commands
+
+ for cmd in [ 'btf', 'feature', 'gen', 'iter', 'link', 'net', 'perf', 'struct_ops', ]:
+ source_info = GenericSourceExtractor(cmd + '.c')
+ help_cmd_options = source_info.get_options()
+ source_info.close()
+
+ man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst'))
+ man_cmd_options = man_cmd_info.get_options()
+ man_cmd_info.close()
+
+ verify(help_cmd_options, man_cmd_options,
+ f'Comparing {source_info.filename} (do_help() OPTIONS) and {man_cmd_info.filename} (OPTIONS):')
+
+ source_main_info = GenericSourceExtractor('main.c')
+ help_main_options = source_main_info.get_options()
+ source_main_info.close()
+
+ man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst'))
+ man_main_options = man_main_info.get_options()
+ man_main_info.close()
+
+ verify(help_main_options, man_main_options,
+ f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):')
+
+ sys.exit(retval)
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
index 7eb940a7b2eb..679cf968c7d1 100755
--- a/tools/testing/selftests/bpf/test_doc_build.sh
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -1,13 +1,20 @@
#!/bin/bash
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+set -e
# Assume script is located under tools/testing/selftests/bpf/. We want to start
# build attempts from the top of kernel repository.
-SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_PATH=$(realpath $0)
SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
-KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+KDIR_ROOT_DIR=$(realpath $SCRIPT_REL_DIR/../../../../)
+SCRIPT_REL_DIR=$(dirname $(realpath --relative-to=$KDIR_ROOT_DIR $SCRIPT_REL_PATH))
cd $KDIR_ROOT_DIR
+if [ ! -e $PWD/$SCRIPT_REL_DIR/Makefile ]; then
+ echo -e "skip: bpftool files not found!\n"
+ exit 4 # KSFT_SKIP=4
+fi
+
for tgt in docs docs-clean; do
make -s -C $PWD/$SCRIPT_REL_DIR $tgt;
done
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 6a5349f9eb14..7e9049fa3edf 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -231,6 +231,14 @@ static void test_lru_sanity0(int map_type, int map_flags)
assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
errno == ENOENT);
+ /* lookup elem key=1 and delete it, then check it doesn't exist */
+ key = 1;
+ assert(!bpf_map_lookup_and_delete_elem(lru_map_fd, &key, &value));
+ assert(value[0] == 1234);
+
+ /* remove the same element from the expected map */
+ assert(!bpf_map_delete_elem(expected_map_fd, &key));
+
assert(map_equal(lru_map_fd, expected_map_fd));
close(expected_map_fd);
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index 59ea56945e6c..b497bb85b667 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -112,6 +112,14 @@ setup()
ip netns add "${NS2}"
ip netns add "${NS3}"
+ # rp_filter gets confused by what these tests are doing, so disable it
+ ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
ip link add veth1 type veth peer name veth2
ip link add veth3 type veth peer name veth4
ip link add veth5 type veth peer name veth6
@@ -236,11 +244,6 @@ setup()
ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
- # rp_filter gets confused by what these tests are doing, so disable it
- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
-
TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
sleep 1 # reduce flakiness
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 51adc42b2b40..c7a36a9378f8 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -53,23 +53,30 @@ static void test_hashmap(unsigned int task, void *data)
value = 0;
/* BPF_NOEXIST means add new element if it doesn't exist. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
- assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, -1) < 0 &&
errno == EINVAL);
/* Check that key=1 can be found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
key = 2;
+ value = 1234;
+ /* Insert key=2 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+ /* Check that key=2 matches the value and delete it */
+ assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234);
+
/* Check that key=2 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
/* key=2 is not there. */
errno == ENOENT);
@@ -80,7 +87,7 @@ static void test_hashmap(unsigned int task, void *data)
* inserted due to max_entries limit.
*/
key = 0;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Update existing element, though the map is full. */
@@ -89,12 +96,12 @@ static void test_hashmap(unsigned int task, void *data)
key = 2;
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
key = 3;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
key = 0;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -104,7 +111,7 @@ static void test_hashmap(unsigned int task, void *data)
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
(next_key == 1 || next_key == 2) &&
(next_key != first_key));
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete both elements. */
@@ -112,13 +119,13 @@ static void test_hashmap(unsigned int task, void *data)
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
- assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
errno == ENOENT);
close(fd);
@@ -166,15 +173,25 @@ static void test_hashmap_percpu(unsigned int task, void *data)
/* Insert key=1 element. */
assert(!(expected_key_mask & key));
assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
+
+ /* Lookup and delete elem key=1 and check value. */
+ assert(bpf_map_lookup_and_delete_elem(fd, &key, value) == 0 &&
+ bpf_percpu(value,0) == 100);
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(value,i) = i + 100;
+
+ /* Insert key=1 element which should not exist. */
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
expected_key_mask |= key;
/* BPF_NOEXIST means add new element if it doesn't exist. */
- assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
- assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, -1) < 0 &&
errno == EINVAL);
/* Check that key=1 can be found. Value could be 0 if the lookup
@@ -186,10 +203,10 @@ static void test_hashmap_percpu(unsigned int task, void *data)
key = 2;
/* Check that key=2 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, value) < 0 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
- assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) < 0 &&
/* key=2 is not there. */
errno == ENOENT);
@@ -202,11 +219,11 @@ static void test_hashmap_percpu(unsigned int task, void *data)
* inserted due to max_entries limit.
*/
key = 0;
- assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -237,13 +254,13 @@ static void test_hashmap_percpu(unsigned int task, void *data)
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
- assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
errno == ENOENT);
close(fd);
@@ -360,7 +377,7 @@ static void test_arraymap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
value = 0;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that key=1 can be found. */
@@ -374,11 +391,11 @@ static void test_arraymap(unsigned int task, void *data)
* due to max_entries limit.
*/
key = 2;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -387,12 +404,12 @@ static void test_arraymap(unsigned int task, void *data)
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
close(fd);
}
@@ -418,7 +435,7 @@ static void test_arraymap_percpu(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
bpf_percpu(values, 0) = 0;
- assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that key=1 can be found. */
@@ -433,11 +450,11 @@ static void test_arraymap_percpu(unsigned int task, void *data)
/* Check that key=2 cannot be inserted due to max_entries limit. */
key = 2;
- assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) < 0 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
- assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, values) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -446,12 +463,12 @@ static void test_arraymap_percpu(unsigned int task, void *data)
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
close(fd);
}
@@ -555,7 +572,7 @@ static void test_queuemap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
/* Check that element cannot be pushed due to max_entries limit */
- assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
errno == E2BIG);
/* Peek element */
@@ -571,12 +588,12 @@ static void test_queuemap(unsigned int task, void *data)
val == vals[i]);
/* Check that there are not elements left */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
errno == ENOENT);
/* Check that non supported functions set errno to EINVAL */
- assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
- assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
close(fd);
}
@@ -613,7 +630,7 @@ static void test_stackmap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
/* Check that element cannot be pushed due to max_entries limit */
- assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
errno == E2BIG);
/* Peek element */
@@ -629,12 +646,12 @@ static void test_stackmap(unsigned int task, void *data)
val == vals[i]);
/* Check that there are not elements left */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
errno == ENOENT);
/* Check that non supported functions set errno to EINVAL */
- assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
- assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
close(fd);
}
@@ -747,8 +764,8 @@ static void test_sockmap(unsigned int tasks, void *data)
udp = socket(AF_INET, SOCK_DGRAM, 0);
i = 0;
err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
- if (!err) {
- printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+ if (err) {
+ printf("Failed socket update SOCK_DGRAM '%i:%i'\n",
i, udp);
goto out_sockmap;
}
@@ -835,7 +852,7 @@ static void test_sockmap(unsigned int tasks, void *data)
}
bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
- if (IS_ERR(bpf_map_rx)) {
+ if (!bpf_map_rx) {
printf("Failed to load map rx from verdict prog\n");
goto out_sockmap;
}
@@ -847,7 +864,7 @@ static void test_sockmap(unsigned int tasks, void *data)
}
bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
- if (IS_ERR(bpf_map_tx)) {
+ if (!bpf_map_tx) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
@@ -859,7 +876,7 @@ static void test_sockmap(unsigned int tasks, void *data)
}
bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
- if (IS_ERR(bpf_map_msg)) {
+ if (!bpf_map_msg) {
printf("Failed to load map msg from msg_verdict prog\n");
goto out_sockmap;
}
@@ -871,7 +888,7 @@ static void test_sockmap(unsigned int tasks, void *data)
}
bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
- if (IS_ERR(bpf_map_break)) {
+ if (!bpf_map_break) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
@@ -968,7 +985,7 @@ static void test_sockmap(unsigned int tasks, void *data)
FD_ZERO(&w);
FD_SET(sfd[3], &w);
- to.tv_sec = 1;
+ to.tv_sec = 30;
to.tv_usec = 0;
s = select(sfd[3] + 1, &w, NULL, NULL, &to);
if (s == -1) {
@@ -1136,12 +1153,17 @@ out_sockmap:
}
#define MAPINMAP_PROG "./test_map_in_map.o"
+#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.o"
static void test_map_in_map(void)
{
struct bpf_object *obj;
struct bpf_map *map;
int mim_fd, fd, err;
int pos = 0;
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ __u32 id = 0;
+ libbpf_print_fn_t old_print_fn;
obj = bpf_object__open(MAPINMAP_PROG);
@@ -1153,7 +1175,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_array");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load array of maps from test prog\n");
goto out_map_in_map;
}
@@ -1164,7 +1186,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_hash");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load hash of maps from test prog\n");
goto out_map_in_map;
}
@@ -1177,7 +1199,7 @@ static void test_map_in_map(void)
bpf_object__load(obj);
map = bpf_object__find_map_by_name(obj, "mim_array");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load array of maps from test prog\n");
goto out_map_in_map;
}
@@ -1194,7 +1216,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_hash");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load hash of maps from test prog\n");
goto out_map_in_map;
}
@@ -1211,11 +1233,72 @@ static void test_map_in_map(void)
}
close(fd);
+ fd = -1;
bpf_object__close(obj);
+
+ /* Test that failing bpf_object__create_map() destroys the inner map */
+ obj = bpf_object__open(MAPINMAP_INVALID_PROG);
+ err = libbpf_get_error(obj);
+ if (err) {
+ printf("Failed to load %s program: %d %d",
+ MAPINMAP_INVALID_PROG, err, errno);
+ goto out_map_in_map;
+ }
+
+ map = bpf_object__find_map_by_name(obj, "mim");
+ if (!map) {
+ printf("Failed to load array of maps from test prog\n");
+ goto out_map_in_map;
+ }
+
+ old_print_fn = libbpf_set_print(NULL);
+
+ err = bpf_object__load(obj);
+ if (!err) {
+ printf("Loading obj supposed to fail\n");
+ goto out_map_in_map;
+ }
+
+ libbpf_set_print(old_print_fn);
+
+ /* Iterate over all maps to check whether the internal map
+ * ("mim.internal") has been destroyed.
+ */
+ while (true) {
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ printf("Failed to get next map: %d", errno);
+ goto out_map_in_map;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ printf("Failed to get map by id %u: %d", id, errno);
+ goto out_map_in_map;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ printf("Failed to get map info by fd %d: %d", fd,
+ errno);
+ goto out_map_in_map;
+ }
+
+ if (!strcmp(info.name, "mim.inner")) {
+ printf("Inner map mim.inner was not destroyed\n");
+ goto out_map_in_map;
+ }
+ }
+
return;
out_map_in_map:
- close(fd);
+ if (fd >= 0)
+ close(fd);
exit(1);
}
@@ -1246,7 +1329,7 @@ static void test_map_large(void)
}
key.c = -1;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Iterate through all elements. */
@@ -1254,12 +1337,12 @@ static void test_map_large(void)
key.c = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
key.c = 0;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
key.a = 1;
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
close(fd);
}
@@ -1313,15 +1396,22 @@ static void test_map_stress(void)
#define DO_DELETE 0
#define MAP_RETRIES 20
+#define MAX_DELAY_US 50000
+#define MIN_DELAY_RANGE_US 5000
static int map_update_retriable(int map_fd, const void *key, const void *value,
int flags, int attempts)
{
+ int delay = rand() % MIN_DELAY_RANGE_US;
+
while (bpf_map_update_elem(map_fd, key, value, flags)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;
- usleep(1);
+ if (delay <= MAX_DELAY_US / 2)
+ delay *= 2;
+
+ usleep(delay);
attempts--;
}
@@ -1330,11 +1420,16 @@ static int map_update_retriable(int map_fd, const void *key, const void *value,
static int map_delete_retriable(int map_fd, const void *key, int attempts)
{
+ int delay = rand() % MIN_DELAY_RANGE_US;
+
while (bpf_map_delete_elem(map_fd, key)) {
if (!attempts || (errno != EAGAIN && errno != EBUSY))
return -errno;
- usleep(1);
+ if (delay <= MAX_DELAY_US / 2)
+ delay *= 2;
+
+ usleep(delay);
attempts--;
}
@@ -1391,7 +1486,7 @@ static void test_map_parallel(void)
run_parallel(TASKS, test_update_delete, data);
/* Check that key=0 is already there. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that all elements were inserted. */
@@ -1399,7 +1494,7 @@ static void test_map_parallel(void)
key = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
/* Another check for all elements */
for (i = 0; i < MAP_SIZE; i++) {
@@ -1415,8 +1510,8 @@ static void test_map_parallel(void)
/* Nothing should be left. */
key = -1;
- assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, NULL, &key) < 0 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
}
static void test_map_rdonly(void)
@@ -1434,12 +1529,12 @@ static void test_map_rdonly(void)
key = 1;
value = 1234;
/* Try to insert key=1 element. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) < 0 &&
errno == EPERM);
/* Check that key=1 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == ENOENT);
close(fd);
}
@@ -1462,8 +1557,8 @@ static void test_map_wronly_hash(void)
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
/* Check that reading elements and keys from the map is not allowed. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
- assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == EPERM);
+ assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == EPERM);
close(fd);
}
@@ -1490,10 +1585,10 @@ static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
/* Peek element should fail */
- assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+ assert(bpf_map_lookup_elem(fd, NULL, &value) < 0 && errno == EPERM);
/* Pop element should fail */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) < 0 &&
errno == EPERM);
close(fd);
@@ -1547,7 +1642,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
value = &fd32;
}
err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"reuseport array update unbound sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1576,7 +1671,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
*/
err = bpf_map_update_elem(map_fd, &index0, value,
BPF_ANY);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"reuseport array update non-listening sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1606,31 +1701,31 @@ static void test_reuseport_array(void)
map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
sizeof(__u32), sizeof(__u64), array_size, 0);
- CHECK(map_fd == -1, "reuseport array create",
+ CHECK(map_fd < 0, "reuseport array create",
"map_fd:%d, errno:%d\n", map_fd, errno);
/* Test lookup/update/delete with invalid index */
err = bpf_map_delete_elem(map_fd, &bad_index);
- CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+ CHECK(err >= 0 || errno != E2BIG, "reuseport array del >=max_entries",
"err:%d errno:%d\n", err, errno);
err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != E2BIG,
+ CHECK(err >= 0 || errno != E2BIG,
"reuseport array update >=max_entries",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array update >=max_entries",
"err:%d errno:%d\n", err, errno);
/* Test lookup/delete non existence elem */
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array lookup not-exist elem",
"err:%d errno:%d\n", err, errno);
err = bpf_map_delete_elem(map_fd, &index3);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array del not-exist elem",
"err:%d errno:%d\n", err, errno);
@@ -1644,7 +1739,7 @@ static void test_reuseport_array(void)
/* BPF_EXIST failure case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_EXIST);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array update empty elem BPF_EXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1653,7 +1748,7 @@ static void test_reuseport_array(void)
/* BPF_NOEXIST success case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_NOEXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update empty elem BPF_NOEXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1662,7 +1757,7 @@ static void test_reuseport_array(void)
/* BPF_EXIST success case. */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_EXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update same elem BPF_EXIST",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
@@ -1670,7 +1765,7 @@ static void test_reuseport_array(void)
/* BPF_NOEXIST failure case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_NOEXIST);
- CHECK(err != -1 || errno != EEXIST,
+ CHECK(err >= 0 || errno != EEXIST,
"reuseport array update non-empty elem BPF_NOEXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1679,7 +1774,7 @@ static void test_reuseport_array(void)
/* BPF_ANY case (always succeed) */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_ANY);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update same sk with BPF_ANY",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
@@ -1688,32 +1783,32 @@ static void test_reuseport_array(void)
/* The same sk cannot be added to reuseport_array twice */
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != EBUSY,
+ CHECK(err >= 0 || errno != EBUSY,
"reuseport array update same sk with same index",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != EBUSY,
+ CHECK(err >= 0 || errno != EBUSY,
"reuseport array update same sk with different index",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
/* Test delete elem */
err = bpf_map_delete_elem(map_fd, &index3);
- CHECK(err == -1, "reuseport array delete sk",
+ CHECK(err < 0, "reuseport array delete sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
/* Add it back with BPF_NOEXIST */
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array re-add with BPF_NOEXIST after del",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
/* Test cookie */
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err == -1 || sk_cookie != map_cookie,
+ CHECK(err < 0 || sk_cookie != map_cookie,
"reuseport array lookup re-added sk",
"sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
type, err, errno, sk_cookie, map_cookie);
@@ -1722,7 +1817,7 @@ static void test_reuseport_array(void)
for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
close(grpa_fds64[f]);
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array lookup after close()",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1733,7 +1828,7 @@ static void test_reuseport_array(void)
CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
err, errno);
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
- CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+ CHECK(err >= 0 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
"err:%d errno:%d\n", err, errno);
close(fd64);
@@ -1743,16 +1838,16 @@ static void test_reuseport_array(void)
/* Test 32 bit fd */
map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
sizeof(__u32), sizeof(__u32), array_size, 0);
- CHECK(map_fd == -1, "reuseport array create",
+ CHECK(map_fd < 0, "reuseport array create",
"map_fd:%d, errno:%d\n", map_fd, errno);
prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64,
&sk_cookie, 1);
fd = fd64;
err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
- CHECK(err == -1, "reuseport array update 32 bit fd",
+ CHECK(err < 0, "reuseport array update 32 bit fd",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOSPC,
+ CHECK(err >= 0 || errno != ENOSPC,
"reuseport array lookup 32 bit fd",
"err:%d errno:%d\n", err, errno);
close(fd);
@@ -1798,6 +1893,8 @@ int main(void)
{
srand(time(NULL));
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
map_flags = 0;
run_all_tests();
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
deleted file mode 100644
index a7b9a69f4fd5..000000000000
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ /dev/null
@@ -1,148 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-#include "netcnt_common.h"
-
-#define BPF_PROG "./netcnt_prog.o"
-#define TEST_CGROUP "/test-network-counters/"
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
- const char *name)
-{
- struct bpf_map *map;
-
- map = bpf_object__find_map_by_name(obj, name);
- if (!map) {
- printf("%s:FAIL:map '%s' not found\n", test, name);
- return -1;
- }
- return bpf_map__fd(map);
-}
-
-int main(int argc, char **argv)
-{
- struct percpu_net_cnt *percpu_netcnt;
- struct bpf_cgroup_storage_key key;
- int map_fd, percpu_map_fd;
- int error = EXIT_FAILURE;
- struct net_cnt netcnt;
- struct bpf_object *obj;
- int prog_fd, cgroup_fd;
- unsigned long packets;
- unsigned long bytes;
- int cpu, nproc;
- __u32 prog_cnt;
-
- nproc = get_nprocs_conf();
- percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
- if (!percpu_netcnt) {
- printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
- goto err;
- }
-
- if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
- &obj, &prog_fd)) {
- printf("Failed to load bpf program\n");
- goto out;
- }
-
- cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
- if (cgroup_fd < 0)
- goto err;
-
- /* Attach bpf program */
- if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
- printf("Failed to attach bpf program");
- goto err;
- }
-
- if (system("which ping6 &>/dev/null") == 0)
- assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null"));
- else
- assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null"));
-
- if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
- &prog_cnt)) {
- printf("Failed to query attached programs");
- goto err;
- }
-
- map_fd = bpf_find_map(__func__, obj, "netcnt");
- if (map_fd < 0) {
- printf("Failed to find bpf map with net counters");
- goto err;
- }
-
- percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
- if (percpu_map_fd < 0) {
- printf("Failed to find bpf map with percpu net counters");
- goto err;
- }
-
- if (bpf_map_get_next_key(map_fd, NULL, &key)) {
- printf("Failed to get key in cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
- printf("Failed to lookup cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
- printf("Failed to lookup percpu cgroup storage\n");
- goto err;
- }
-
- /* Some packets can be still in per-cpu cache, but not more than
- * MAX_PERCPU_PACKETS.
- */
- packets = netcnt.packets;
- bytes = netcnt.bytes;
- for (cpu = 0; cpu < nproc; cpu++) {
- if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
- printf("Unexpected percpu value: %llu\n",
- percpu_netcnt[cpu].packets);
- goto err;
- }
-
- packets += percpu_netcnt[cpu].packets;
- bytes += percpu_netcnt[cpu].bytes;
- }
-
- /* No packets should be lost */
- if (packets != 10000) {
- printf("Unexpected packet count: %lu\n", packets);
- goto err;
- }
-
- /* Let's check that bytes counter matches the number of packets
- * multiplied by the size of ipv6 ICMP packet.
- */
- if (bytes != packets * 104) {
- printf("Unexpected bytes count: %lu\n", bytes);
- goto err;
- }
-
- error = 0;
- printf("test_netcnt:PASS\n");
-
-err:
- cleanup_cgroup_environment();
- free(percpu_netcnt);
-
-out:
- return error;
-}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6396932b97e2..cc1cd240445d 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -13,6 +13,28 @@
#include <execinfo.h> /* backtrace */
#include <linux/membarrier.h>
+/* Adapted from perf/util/string.c */
+static bool glob_match(const char *str, const char *pat)
+{
+ while (*str && *pat && *pat != '*') {
+ if (*str != *pat)
+ return false;
+ str++;
+ pat++;
+ }
+ /* Check wild card */
+ if (*pat == '*') {
+ while (*pat == '*')
+ pat++;
+ if (!*pat) /* Tail wild card matches all */
+ return true;
+ while (*str)
+ if (glob_match(str++, pat))
+ return true;
+ }
+ return !*str && !*pat;
+}
+
#define EXIT_NO_TEST 2
#define EXIT_ERR_SETUP_INFRA 3
@@ -55,12 +77,12 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
int i;
for (i = 0; i < sel->blacklist.cnt; i++) {
- if (strstr(name, sel->blacklist.strs[i]))
+ if (glob_match(name, sel->blacklist.strs[i]))
return false;
}
for (i = 0; i < sel->whitelist.cnt; i++) {
- if (strstr(name, sel->whitelist.strs[i]))
+ if (glob_match(name, sel->whitelist.strs[i]))
return true;
}
@@ -148,18 +170,18 @@ void test__end_subtest()
struct prog_test_def *test = env.test;
int sub_error_cnt = test->error_cnt - test->old_error_cnt;
+ dump_test_log(test, sub_error_cnt);
+
+ fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
+ test->test_num, test->subtest_num, test->test_name, test->subtest_name,
+ sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+
if (sub_error_cnt)
env.fail_cnt++;
else if (test->skip_cnt == 0)
env.sub_succ_cnt++;
skip_account();
- dump_test_log(test, sub_error_cnt);
-
- fprintf(env.stdout, "#%d/%d %s:%s\n",
- test->test_num, test->subtest_num, test->subtest_name,
- sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
-
free(test->subtest_name);
test->subtest_name = NULL;
}
@@ -450,6 +472,8 @@ enum ARG_KEYS {
ARG_VERBOSE = 'v',
ARG_GET_TEST_CNT = 'c',
ARG_LIST_TEST_NAMES = 'l',
+ ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
+ ARG_TEST_NAME_GLOB_DENYLIST = 'd',
};
static const struct argp_option opts[] = {
@@ -467,6 +491,10 @@ static const struct argp_option opts[] = {
"Get number of selected top-level tests " },
{ "list", ARG_LIST_TEST_NAMES, NULL, 0,
"List test names that would run (without running them) " },
+ { "allow", ARG_TEST_NAME_GLOB_ALLOWLIST, "NAMES", 0,
+ "Run tests with name matching the pattern (supports '*' wildcard)." },
+ { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0,
+ "Don't run tests with name matching the pattern (supports '*' wildcard)." },
{},
};
@@ -491,36 +519,48 @@ static void free_str_set(const struct str_set *set)
free(set->strs);
}
-static int parse_str_list(const char *s, struct str_set *set)
+static int parse_str_list(const char *s, struct str_set *set, bool is_glob_pattern)
{
char *input, *state = NULL, *next, **tmp, **strs = NULL;
- int cnt = 0;
+ int i, cnt = 0;
input = strdup(s);
if (!input)
return -ENOMEM;
- set->cnt = 0;
- set->strs = NULL;
-
while ((next = strtok_r(state ? NULL : input, ",", &state))) {
tmp = realloc(strs, sizeof(*strs) * (cnt + 1));
if (!tmp)
goto err;
strs = tmp;
- strs[cnt] = strdup(next);
- if (!strs[cnt])
- goto err;
+ if (is_glob_pattern) {
+ strs[cnt] = strdup(next);
+ if (!strs[cnt])
+ goto err;
+ } else {
+ strs[cnt] = malloc(strlen(next) + 2 + 1);
+ if (!strs[cnt])
+ goto err;
+ sprintf(strs[cnt], "*%s*", next);
+ }
cnt++;
}
- set->cnt = cnt;
- set->strs = (const char **)strs;
+ tmp = realloc(set->strs, sizeof(*strs) * (cnt + set->cnt));
+ if (!tmp)
+ goto err;
+ memcpy(tmp + set->cnt, strs, sizeof(*strs) * cnt);
+ set->strs = (const char **)tmp;
+ set->cnt += cnt;
+
free(input);
+ free(strs);
return 0;
err:
+ for (i = 0; i < cnt; i++)
+ free(strs[i]);
free(strs);
free(input);
return -ENOMEM;
@@ -553,29 +593,35 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
}
break;
}
+ case ARG_TEST_NAME_GLOB_ALLOWLIST:
case ARG_TEST_NAME: {
char *subtest_str = strchr(arg, '/');
if (subtest_str) {
*subtest_str = '\0';
if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.whitelist))
+ &env->subtest_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST))
return -ENOMEM;
}
- if (parse_str_list(arg, &env->test_selector.whitelist))
+ if (parse_str_list(arg, &env->test_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST))
return -ENOMEM;
break;
}
+ case ARG_TEST_NAME_GLOB_DENYLIST:
case ARG_TEST_NAME_BLACKLIST: {
char *subtest_str = strchr(arg, '/');
if (subtest_str) {
*subtest_str = '\0';
if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.blacklist))
+ &env->subtest_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST))
return -ENOMEM;
}
- if (parse_str_list(arg, &env->test_selector.blacklist))
+ if (parse_str_list(arg, &env->test_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST))
return -ENOMEM;
break;
}
@@ -737,6 +783,9 @@ int main(int argc, char **argv)
if (err)
return err;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
libbpf_set_print(libbpf_print_fn);
srand(time(NULL));
@@ -752,7 +801,7 @@ int main(int argc, char **argv)
save_netns();
stdio_hijack();
env.has_testmod = true;
- if (load_bpf_testmod()) {
+ if (!env.list_test_names && load_bpf_testmod()) {
fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
env.has_testmod = false;
}
@@ -783,24 +832,25 @@ int main(int argc, char **argv)
test__end_subtest();
test->tested = true;
- if (test->error_cnt)
- env.fail_cnt++;
- else
- env.succ_cnt++;
- skip_account();
dump_test_log(test, test->error_cnt);
fprintf(env.stdout, "#%d %s:%s\n",
test->test_num, test->test_name,
- test->error_cnt ? "FAIL" : "OK");
+ test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+
+ if (test->error_cnt)
+ env.fail_cnt++;
+ else
+ env.succ_cnt++;
+ skip_account();
reset_affinity();
restore_netns();
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
}
- if (env.has_testmod)
+ if (!env.list_test_names && env.has_testmod)
unload_bpf_testmod();
stdio_restore();
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index dda52cb649dc..c8c2bf878f67 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -221,6 +221,18 @@ extern int test__join_cgroup(const char *path);
___ok; \
})
+#define ASSERT_STRNEQ(actual, expected, len, name) ({ \
+ static int duration = 0; \
+ const char *___act = actual; \
+ const char *___exp = expected; \
+ int ___len = len; \
+ bool ___ok = strncmp(___act, ___exp, ___len) == 0; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual '%.*s' != expected '%.*s'\n", \
+ (name), ___len, ___act, ___len, ___exp); \
+ ___ok; \
+})
+
#define ASSERT_OK(res, name) ({ \
static int duration = 0; \
long long ___res = (res); \
@@ -249,16 +261,17 @@ extern int test__join_cgroup(const char *path);
#define ASSERT_OK_PTR(ptr, name) ({ \
static int duration = 0; \
const void *___res = (ptr); \
- bool ___ok = !IS_ERR_OR_NULL(___res); \
- CHECK(!___ok, (name), \
- "unexpected error: %ld\n", PTR_ERR(___res)); \
+ int ___err = libbpf_get_error(___res); \
+ bool ___ok = ___err == 0; \
+ CHECK(!___ok, (name), "unexpected error: %d\n", ___err); \
___ok; \
})
#define ASSERT_ERR_PTR(ptr, name) ({ \
static int duration = 0; \
const void *___res = (ptr); \
- bool ___ok = IS_ERR(___res); \
+ int ___err = libbpf_get_error(___res); \
+ bool ___ok = ___err != 0; \
CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
___ok; \
})
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index c9dde9b9d987..088fcad138c9 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -69,7 +69,7 @@ cleanup() {
}
server_listen() {
- ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
+ ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
server_pid=$!
sleep 0.2
}
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 73da7fe8c152..4a39304cc5a6 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -82,6 +82,8 @@ int main(int argc, char **argv)
cpu_set_t cpuset;
__u32 key = 0;
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
@@ -116,7 +118,7 @@ int main(int argc, char **argv)
pb_opts.sample_cb = dummyfn;
pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts);
- if (IS_ERR(pb))
+ if (!pb)
goto err;
pthread_create(&tid, NULL, poller_thread, pb);
@@ -163,7 +165,6 @@ err:
bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
close(cg_fd);
cleanup_cgroup_environment();
- if (!IS_ERR_OR_NULL(pb))
- perf_buffer__free(pb);
+ perf_buffer__free(pb);
return error;
}
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
new file mode 100755
index 000000000000..1538373157e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -0,0 +1,204 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test topology:
+# - - - - - - - - - - - - - - - - - - - - - - - - -
+# | veth1 veth2 veth3 | ... init net
+# - -| - - - - - - | - - - - - - | - -
+# --------- --------- ---------
+# | veth0 | | veth0 | | veth0 | ...
+# --------- --------- ---------
+# ns1 ns2 ns3
+#
+# Test modules:
+# XDP modes: generic, native, native + egress_prog
+#
+# Test cases:
+# ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive
+# the redirects.
+# ns1 -> gw: ns1, ns2, ns3, should receive the arp request
+# IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress
+# interface should not receive the redirects.
+# ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects.
+# IPv6: Testing none flag, all the pkts should be redirected back
+# ping test: ns1 -> ns2 (block), echo requests will be redirect back
+# egress_prog:
+# all src mac should be egress interface's mac
+
+# netns numbers
+NUM=3
+IFACES=""
+DRV_MODE="xdpgeneric xdpdrv xdpegress"
+PASS=0
+FAIL=0
+
+test_pass()
+{
+ echo "Pass: $@"
+ PASS=$((PASS + 1))
+}
+
+test_fail()
+{
+ echo "fail: $@"
+ FAIL=$((FAIL + 1))
+}
+
+clean_up()
+{
+ for i in $(seq $NUM); do
+ ip link del veth$i 2> /dev/null
+ ip netns del ns$i 2> /dev/null
+ done
+}
+
+# Kselftest framework requirement - SKIP code is 4.
+check_env()
+{
+ ip link set dev lo xdpgeneric off &>/dev/null
+ if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
+ exit 4
+ fi
+
+ which tcpdump &>/dev/null
+ if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without tcpdump"
+ exit 4
+ fi
+}
+
+setup_ns()
+{
+ local mode=$1
+ IFACES=""
+
+ if [ "$mode" = "xdpegress" ]; then
+ mode="xdpdrv"
+ fi
+
+ for i in $(seq $NUM); do
+ ip netns add ns$i
+ ip link add veth$i type veth peer name veth0 netns ns$i
+ ip link set veth$i up
+ ip -n ns$i link set veth0 up
+
+ ip -n ns$i addr add 192.0.2.$i/24 dev veth0
+ ip -n ns$i addr add 2001:db8::$i/64 dev veth0
+ # Add a neigh entry for IPv4 ping test
+ ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+ ip -n ns$i link set veth0 $mode obj \
+ xdp_dummy.o sec xdp_dummy &> /dev/null || \
+ { test_fail "Unable to load dummy xdp" && exit 1; }
+ IFACES="$IFACES veth$i"
+ veth_mac[$i]=$(ip link show veth$i | awk '/link\/ether/ {print $2}')
+ done
+}
+
+do_egress_tests()
+{
+ local mode=$1
+
+ # mac test
+ ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-2_${mode}.log &
+ ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-3_${mode}.log &
+ sleep 0.5
+ ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+ sleep 0.5
+ pkill -9 tcpdump
+
+ # mac check
+ grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" mac_ns1-2_${mode}.log && \
+ test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2"
+ grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" mac_ns1-3_${mode}.log && \
+ test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3"
+}
+
+do_ping_tests()
+{
+ local mode=$1
+
+ # ping6 test: echo request should be redirect back to itself, not others
+ ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+
+ ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ns1-1_${mode}.log &
+ ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ns1-2_${mode}.log &
+ ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ns1-3_${mode}.log &
+ sleep 0.5
+ # ARP test
+ ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+ # IPv4 test
+ ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+ # IPv6 test
+ ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+ sleep 0.5
+ pkill -9 tcpdump
+
+ # All netns should receive the redirect arp requests
+ [ $(grep -c "who-has 192.0.2.254" ns1-1_${mode}.log) -gt 4 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-1" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-1"
+ [ $(grep -c "who-has 192.0.2.254" ns1-2_${mode}.log) -le 4 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-2" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-2"
+ [ $(grep -c "who-has 192.0.2.254" ns1-3_${mode}.log) -le 4 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-3" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-3"
+
+ # ns1 should not receive the redirect echo request, others should
+ [ $(grep -c "ICMP echo request" ns1-1_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1"
+ [ $(grep -c "ICMP echo request" ns1-2_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2"
+ [ $(grep -c "ICMP echo request" ns1-3_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3"
+
+ # ns1 should receive the echo request, ns2 should not
+ [ $(grep -c "ICMP6, echo request" ns1-1_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv6 (no flags) ns1-1" || \
+ test_fail "$mode IPv6 (no flags) ns1-1"
+ [ $(grep -c "ICMP6, echo request" ns1-2_${mode}.log) -eq 0 ] && \
+ test_pass "$mode IPv6 (no flags) ns1-2" || \
+ test_fail "$mode IPv6 (no flags) ns1-2"
+}
+
+do_tests()
+{
+ local mode=$1
+ local drv_p
+
+ case ${mode} in
+ xdpdrv) drv_p="-N";;
+ xdpegress) drv_p="-X";;
+ xdpgeneric) drv_p="-S";;
+ esac
+
+ ./xdp_redirect_multi $drv_p $IFACES &> xdp_redirect_${mode}.log &
+ xdp_pid=$!
+ sleep 1
+
+ if [ "$mode" = "xdpegress" ]; then
+ do_egress_tests $mode
+ else
+ do_ping_tests $mode
+ fi
+
+ kill $xdp_pid
+}
+
+trap clean_up 0 2 3 6 9
+
+check_env
+rm -f xdp_redirect_*.log ns*.log mac_ns*.log
+
+for mode in ${DRV_MODE}; do
+ setup_ns $mode
+ do_tests $mode
+ clean_up
+done
+
+echo "Summary: PASS $PASS, FAIL $FAIL"
+[ $FAIL -eq 0 ] && exit 0 || exit 1
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
index ba8ffcdaac30..995278e684b6 100755
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -108,7 +108,7 @@ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
-ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
+ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
trap cleanup EXIT
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 46633a3bfb0b..cd7bf32e6a17 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -63,14 +63,11 @@
# ----------------
# Must run with CAP_NET_ADMIN capability.
#
-# Run (full color-coded output):
-# sudo ./test_xsk.sh -c
+# Run:
+# sudo ./test_xsk.sh
#
# If running from kselftests:
-# sudo make colorconsole=1 run_tests
-#
-# Run (full output without color-coding):
-# sudo ./test_xsk.sh
+# sudo make run_tests
#
# Run with verbose output:
# sudo ./test_xsk.sh -v
@@ -83,7 +80,6 @@
while getopts "cvD" flag
do
case "${flag}" in
- c) colorconsole=1;;
v) verbose=1;;
D) dump_pkts=1;;
esac
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 1bbd1d9830c8..e7a19b04d4ea 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -136,3 +136,90 @@ void read_trace_pipe(void)
}
}
}
+
+#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
+
+#define OP_RT_RA_MASK 0xffff0000UL
+#define LIS_R2 0x3c400000UL
+#define ADDIS_R2_R12 0x3c4c0000UL
+#define ADDI_R2_R2 0x38420000UL
+
+ssize_t get_uprobe_offset(const void *addr, ssize_t base)
+{
+ u32 *insn = (u32 *)(uintptr_t)addr;
+
+ /*
+ * A PPC64 ABIv2 function may have a local and a global entry
+ * point. We need to use the local entry point when patching
+ * functions, so identify and step over the global entry point
+ * sequence.
+ *
+ * The global entry point sequence is always of the form:
+ *
+ * addis r2,r12,XXXX
+ * addi r2,r2,XXXX
+ *
+ * A linker optimisation may convert the addis to lis:
+ *
+ * lis r2,XXXX
+ * addi r2,r2,XXXX
+ */
+ if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+ ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+ ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+ return (ssize_t)(insn + 2) - base;
+ else
+ return (uintptr_t)addr - base;
+}
+
+#else
+
+ssize_t get_uprobe_offset(const void *addr, ssize_t base)
+{
+ return (uintptr_t)addr - base;
+}
+
+#endif
+
+ssize_t get_base_addr(void)
+{
+ size_t start, offset;
+ char buf[256];
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
+ &start, buf, &offset) == 3) {
+ if (strcmp(buf, "r-xp") == 0) {
+ fclose(f);
+ return start - offset;
+ }
+ }
+
+ fclose(f);
+ return -EINVAL;
+}
+
+ssize_t get_rel_offset(uintptr_t addr)
+{
+ size_t start, end, offset;
+ char buf[256];
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &offset) == 4) {
+ if (addr >= start && addr < end) {
+ fclose(f);
+ return (size_t)addr - start + offset;
+ }
+ }
+
+ fclose(f);
+ return -EINVAL;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index f62fdef9e589..d907b445524d 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -18,4 +18,8 @@ int kallsyms_find(const char *sym, unsigned long long *addr);
void read_trace_pipe(void);
+ssize_t get_uprobe_offset(const void *addr, ssize_t base);
+ssize_t get_base_addr(void);
+ssize_t get_rel_offset(uintptr_t addr);
+
#endif
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 2c8935b3e65d..ee454327e5c6 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -159,3 +159,15 @@
.result = ACCEPT,
.retval = 2,
},
+{
+ "dead code: zero extension",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0,
+},
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index a3e593ddfafc..2debba4e8a3a 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -1,4 +1,233 @@
{
+ "map access: known scalar += value_ptr unknown vs const",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "map access: known scalar += value_ptr const vs unknown",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "map access: known scalar += value_ptr const vs const (ne)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 5),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "map access: known scalar += value_ptr const vs const (eq)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 5),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 5),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "map access: known scalar += value_ptr unknown vs unknown (eq)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "map access: known scalar += value_ptr unknown vs unknown (lt)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "map access: known scalar += value_ptr unknown vs unknown (gt)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_1, 6),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_16b = { 5 },
+ .fixup_map_array_48b = { 8 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 tried to add from different maps, paths or scalars",
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
"map access: known scalar += value_ptr from different maps",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
new file mode 100644
index 000000000000..3696a8f32c23
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/if.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/resource.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#define MAX_IFACE_NUM 32
+#define MAX_INDEX_NUM 1024
+
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int ifaces[MAX_IFACE_NUM] = {};
+
+static void int_exit(int sig)
+{
+ __u32 prog_id = 0;
+ int i;
+
+ for (i = 0; ifaces[i] > 0; i++) {
+ if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
+ printf("bpf_get_link_xdp_id failed\n");
+ exit(1);
+ }
+ if (prog_id)
+ bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+ }
+
+ exit(0);
+}
+
+static int get_mac_addr(unsigned int ifindex, void *mac_addr)
+{
+ char ifname[IF_NAMESIZE];
+ struct ifreq ifr;
+ int fd, ret = -1;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return ret;
+
+ if (!if_indextoname(ifindex, ifname))
+ goto err_out;
+
+ strcpy(ifr.ifr_name, ifname);
+
+ if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
+ goto err_out;
+
+ memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+ ret = 0;
+
+err_out:
+ close(fd);
+ return ret;
+}
+
+static void usage(const char *prog)
+{
+ fprintf(stderr,
+ "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
+ "OPTS:\n"
+ " -S use skb-mode\n"
+ " -N enforce native mode\n"
+ " -F force loading prog\n"
+ " -X load xdp program on egress\n",
+ prog);
+}
+
+int main(int argc, char **argv)
+{
+ int prog_fd, group_all, mac_map;
+ struct bpf_program *ingress_prog, *egress_prog;
+ struct bpf_prog_load_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_UNSPEC,
+ };
+ int i, ret, opt, egress_prog_fd = 0;
+ struct bpf_devmap_val devmap_val;
+ bool attach_egress_prog = false;
+ unsigned char mac_addr[6];
+ char ifname[IF_NAMESIZE];
+ struct bpf_object *obj;
+ unsigned int ifindex;
+ char filename[256];
+
+ while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+ switch (opt) {
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'N':
+ /* default, set below */
+ break;
+ case 'F':
+ xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ break;
+ case 'X':
+ attach_egress_prog = true;
+ break;
+ default:
+ usage(basename(argv[0]));
+ return 1;
+ }
+ }
+
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+ } else if (attach_egress_prog) {
+ printf("Load xdp program on egress with SKB mode not supported yet\n");
+ goto err_out;
+ }
+
+ if (optind == argc) {
+ printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
+ goto err_out;
+ }
+
+ printf("Get interfaces");
+ for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
+ ifaces[i] = if_nametoindex(argv[optind + i]);
+ if (!ifaces[i])
+ ifaces[i] = strtoul(argv[optind + i], NULL, 0);
+ if (!if_indextoname(ifaces[i], ifname)) {
+ perror("Invalid interface name or i");
+ goto err_out;
+ }
+ if (ifaces[i] > MAX_INDEX_NUM) {
+ printf("Interface index to large\n");
+ goto err_out;
+ }
+ printf(" %d", ifaces[i]);
+ }
+ printf("\n");
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ prog_load_attr.file = filename;
+
+ if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+ goto err_out;
+
+ if (attach_egress_prog)
+ group_all = bpf_object__find_map_fd_by_name(obj, "map_egress");
+ else
+ group_all = bpf_object__find_map_fd_by_name(obj, "map_all");
+ mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map");
+
+ if (group_all < 0 || mac_map < 0) {
+ printf("bpf_object__find_map_fd_by_name failed\n");
+ goto err_out;
+ }
+
+ if (attach_egress_prog) {
+ /* Find ingress/egress prog for 2nd xdp prog */
+ ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog");
+ egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
+ if (!ingress_prog || !egress_prog) {
+ printf("finding ingress/egress_prog in obj file failed\n");
+ goto err_out;
+ }
+ prog_fd = bpf_program__fd(ingress_prog);
+ egress_prog_fd = bpf_program__fd(egress_prog);
+ if (prog_fd < 0 || egress_prog_fd < 0) {
+ printf("find egress_prog fd failed\n");
+ goto err_out;
+ }
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ /* Init forward multicast groups and exclude group */
+ for (i = 0; ifaces[i] > 0; i++) {
+ ifindex = ifaces[i];
+
+ if (attach_egress_prog) {
+ ret = get_mac_addr(ifindex, mac_addr);
+ if (ret < 0) {
+ printf("get interface %d mac failed\n", ifindex);
+ goto err_out;
+ }
+ ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0);
+ if (ret) {
+ perror("bpf_update_elem mac_map failed\n");
+ goto err_out;
+ }
+ }
+
+ /* Add all the interfaces to group all */
+ devmap_val.ifindex = ifindex;
+ devmap_val.bpf_prog.fd = egress_prog_fd;
+ ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0);
+ if (ret) {
+ perror("bpf_map_update_elem");
+ goto err_out;
+ }
+
+ /* bind prog_fd to each interface */
+ ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
+ if (ret) {
+ printf("Set xdp fd failed on %d\n", ifindex);
+ goto err_out;
+ }
+ }
+
+ /* sleep some time for testing */
+ sleep(999);
+
+ return 0;
+
+err_out:
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 1135fb980814..f53ce2683f8d 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -70,7 +70,6 @@
#include <errno.h>
#include <getopt.h>
#include <asm/barrier.h>
-typedef __u16 __sum16;
#include <linux/if_link.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
@@ -106,14 +105,9 @@ static const u16 UDP_PORT2 = 2121;
static void __exit_with_error(int error, const char *file, const char *func, int line)
{
- if (configured_mode == TEST_MODE_UNCONFIGURED) {
- ksft_exit_fail_msg
- ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
- } else {
- ksft_test_result_fail
- ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
- ksft_exit_xfail();
- }
+ ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
+ strerror(error));
+ ksft_exit_xfail();
}
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
@@ -126,7 +120,7 @@ static void __exit_with_error(int error, const char *file, const char *func, int
test_type == TEST_TYPE_STATS ? "Stats" : "",\
test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
-static void *memset32_htonl(void *dest, u32 val, u32 size)
+static void memset32_htonl(void *dest, u32 val, u32 size)
{
u32 *ptr = (u32 *)dest;
int i;
@@ -135,11 +129,6 @@ static void *memset32_htonl(void *dest, u32 val, u32 size)
for (i = 0; i < (size & (~0x3)); i += 4)
ptr[i >> 2] = val;
-
- for (; i < size; i++)
- ((char *)dest)[i] = ((char *)&val)[i & 3];
-
- return dest;
}
/*
@@ -230,13 +219,13 @@ static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr)
ip_hdr->check = 0;
}
-static void gen_udp_hdr(struct generic_data *data, struct ifobject *ifobject,
+static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject,
struct udphdr *udp_hdr)
{
udp_hdr->source = htons(ifobject->src_port);
udp_hdr->dest = htons(ifobject->dst_port);
udp_hdr->len = htons(UDP_PKT_SIZE);
- memset32_htonl(pkt_data + PKT_HDR_SIZE, htonl(data->seqnum), UDP_PKT_DATA_SIZE);
+ memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE);
}
static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
@@ -246,12 +235,7 @@ static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
}
-static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
-{
- memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
-}
-
-static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
+static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size, int idx)
{
struct xsk_umem_config cfg = {
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -260,7 +244,6 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
.frame_headroom = frame_headroom,
.flags = XSK_UMEM__DEFAULT_FLAGS
};
- int size = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
struct xsk_umem_info *umem;
int ret;
@@ -271,7 +254,7 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
ret = xsk_umem__create(&umem->umem, buffer, size,
&umem->fq, &umem->cq, &cfg);
if (ret)
- exit_with_error(ret);
+ exit_with_error(-ret);
umem->buffer = buffer;
@@ -285,7 +268,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
- exit_with_error(ret);
+ exit_with_error(-ret);
for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
@@ -333,20 +316,19 @@ static struct option long_options[] = {
{"queue", optional_argument, 0, 'q'},
{"dump-pkts", optional_argument, 0, 'D'},
{"verbose", no_argument, 0, 'v'},
- {"tx-pkt-count", optional_argument, 0, 'C'},
{0, 0, 0, 0}
};
static void usage(const char *prog)
{
const char *str =
- " Usage: %s [OPTIONS]\n"
- " Options:\n"
- " -i, --interface Use interface\n"
- " -q, --queue=n Use queue n (default 0)\n"
- " -D, --dump-pkts Dump packets L2 - L5\n"
- " -v, --verbose Verbose output\n"
- " -C, --tx-pkt-count=n Number of packets to send\n";
+ " Usage: %s [OPTIONS]\n"
+ " Options:\n"
+ " -i, --interface Use interface\n"
+ " -q, --queue=n Use queue n (default 0)\n"
+ " -D, --dump-pkts Dump packets L2 - L5\n"
+ " -v, --verbose Verbose output\n";
+
ksft_print_msg(str, prog);
}
@@ -392,7 +374,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "i:DC:v", long_options, &option_index);
+ c = getopt_long(argc, argv, "i:Dv", long_options, &option_index);
if (c == -1)
break;
@@ -413,13 +395,10 @@ static void parse_command_line(int argc, char **argv)
interface_index++;
break;
case 'D':
- debug_pkt_dump = 1;
- break;
- case 'C':
- opt_pkt_count = atoi(optarg);
+ opt_pkt_dump = true;
break;
case 'v':
- opt_verbose = 1;
+ opt_verbose = true;
break;
default:
usage(basename(argv[0]));
@@ -427,17 +406,143 @@ static void parse_command_line(int argc, char **argv)
}
}
- if (!opt_pkt_count) {
- print_verbose("No tx-pkt-count specified, using default %u\n", DEFAULT_PKT_CNT);
- opt_pkt_count = DEFAULT_PKT_CNT;
- }
-
if (!validate_interfaces()) {
usage(basename(argv[0]));
ksft_exit_xfail();
}
}
+static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
+{
+ if (pkt_nb >= pkt_stream->nb_pkts)
+ return NULL;
+
+ return &pkt_stream->pkts[pkt_nb];
+}
+
+static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ pkt_stream = malloc(sizeof(*pkt_stream));
+ if (!pkt_stream)
+ exit_with_error(ENOMEM);
+
+ pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+ if (!pkt_stream->pkts)
+ exit_with_error(ENOMEM);
+
+ pkt_stream->nb_pkts = nb_pkts;
+ for (i = 0; i < nb_pkts; i++) {
+ pkt_stream->pkts[i].addr = (i % num_frames) * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ pkt_stream->pkts[i].len = pkt_len;
+ pkt_stream->pkts[i].payload = i;
+ }
+
+ return pkt_stream;
+}
+
+static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
+{
+ struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
+ struct udphdr *udp_hdr;
+ struct ethhdr *eth_hdr;
+ struct iphdr *ip_hdr;
+ void *data;
+
+ if (!pkt)
+ return NULL;
+
+ data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
+ udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr));
+ ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+ eth_hdr = (struct ethhdr *)data;
+
+ gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr);
+ gen_ip_hdr(ifobject, ip_hdr);
+ gen_udp_csum(udp_hdr, ip_hdr);
+ gen_eth_hdr(ifobject, eth_hdr);
+
+ return pkt;
+}
+
+static void pkt_dump(void *pkt, u32 len)
+{
+ char s[INET_ADDRSTRLEN];
+ struct ethhdr *ethhdr;
+ struct udphdr *udphdr;
+ struct iphdr *iphdr;
+ int payload, i;
+
+ ethhdr = pkt;
+ iphdr = pkt + sizeof(*ethhdr);
+ udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr);
+
+ /*extract L2 frame */
+ fprintf(stdout, "DEBUG>> L2: dst mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ fprintf(stdout, "%02X", ethhdr->h_dest[i]);
+
+ fprintf(stdout, "\nDEBUG>> L2: src mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ fprintf(stdout, "%02X", ethhdr->h_source[i]);
+
+ /*extract L3 frame */
+ fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
+ /*extract L4 frame */
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
+ /*extract L5 frame */
+ payload = *((uint32_t *)(pkt + PKT_HDR_SIZE));
+
+ fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
+ fprintf(stdout, "---------------------------------------\n");
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *desc)
+{
+ void *data = xsk_umem__get_data(buffer, desc->addr);
+ struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
+
+ if (!pkt) {
+ ksft_test_result_fail("ERROR: [%s] too many packets received\n", __func__);
+ return false;
+ }
+
+ if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
+ u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
+
+ if (opt_pkt_dump && test_type != TEST_TYPE_STATS)
+ pkt_dump(data, PKT_SIZE);
+
+ if (pkt->len != desc->len) {
+ ksft_test_result_fail
+ ("ERROR: [%s] expected length [%d], got length [%d]\n",
+ __func__, pkt->len, desc->len);
+ return false;
+ }
+
+ if (pkt->payload != seqnum) {
+ ksft_test_result_fail
+ ("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n",
+ __func__, pkt->payload, seqnum);
+ return false;
+ }
+ } else {
+ ksft_print_msg("Invalid frame received: ");
+ ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
+ iphdr->tos);
+ return false;
+ }
+
+ return true;
+}
+
static void kick_tx(struct xsk_socket_info *xsk)
{
int ret;
@@ -448,7 +553,7 @@ static void kick_tx(struct xsk_socket_info *xsk)
exit_with_error(errno);
}
-static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
{
unsigned int rcvd;
u32 idx;
@@ -463,133 +568,108 @@ static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
if (rcvd) {
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
xsk->outstanding_tx -= rcvd;
- xsk->tx_npkts += rcvd;
}
}
-static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
+static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk,
+ struct pollfd *fds)
{
- unsigned int rcvd, i;
- u32 idx_rx = 0, idx_fq = 0;
+ u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkt_count = 0;
+ struct pkt *pkt;
int ret;
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
- if (!rcvd) {
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret < 0)
- exit_with_error(ret);
+ pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
+ while (pkt) {
+ rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ if (!rcvd) {
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ exit_with_error(-ret);
+ }
+ continue;
}
- return;
- }
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
- while (ret != rcvd) {
- if (ret < 0)
- exit_with_error(ret);
- if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
- ret = poll(fds, 1, POLL_TMOUT);
+ ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+ while (ret != rcvd) {
if (ret < 0)
- exit_with_error(ret);
+ exit_with_error(-ret);
+ if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+ ret = poll(fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ exit_with_error(-ret);
+ }
+ ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
}
- ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
- }
-
- for (i = 0; i < rcvd; i++) {
- u64 addr, orig;
-
- addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr;
- xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
- orig = xsk_umem__extract_addr(addr);
- addr = xsk_umem__add_offset_to_addr(addr);
- pkt_node_rx = malloc(sizeof(struct pkt) + PKT_SIZE);
- if (!pkt_node_rx)
- exit_with_error(errno);
+ for (i = 0; i < rcvd; i++) {
+ const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+ u64 addr = desc->addr, orig;
- pkt_node_rx->pkt_frame = malloc(PKT_SIZE);
- if (!pkt_node_rx->pkt_frame)
- exit_with_error(errno);
+ orig = xsk_umem__extract_addr(addr);
+ addr = xsk_umem__add_offset_to_addr(addr);
+ if (!is_pkt_valid(pkt, xsk->umem->buffer, desc))
+ return;
- memcpy(pkt_node_rx->pkt_frame, xsk_umem__get_data(xsk->umem->buffer, addr),
- PKT_SIZE);
-
- TAILQ_INSERT_HEAD(&head, pkt_node_rx, pkt_nodes);
+ *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+ pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
+ }
- *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
+ xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+ xsk_ring_cons__release(&xsk->rx, rcvd);
}
-
- xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
- xsk_ring_cons__release(&xsk->rx, rcvd);
- xsk->rx_npkts += rcvd;
}
-static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
+static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
{
- u32 idx = 0;
- unsigned int i;
- bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID;
- u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE;
+ struct xsk_socket_info *xsk = ifobject->xsk;
+ u32 i, idx;
- while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
- complete_tx_only(xsk, batch_size);
+ while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE)
+ complete_pkts(xsk, BATCH_SIZE);
- for (i = 0; i < batch_size; i++) {
+ for (i = 0; i < BATCH_SIZE; i++) {
struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+ struct pkt *pkt = pkt_generate(ifobject, pkt_nb);
- tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
- tx_desc->len = len;
- }
+ if (!pkt)
+ break;
- xsk_ring_prod__submit(&xsk->tx, batch_size);
- if (!tx_invalid_test) {
- xsk->outstanding_tx += batch_size;
- } else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
- kick_tx(xsk);
+ tx_desc->addr = pkt->addr;
+ tx_desc->len = pkt->len;
+ pkt_nb++;
}
- *frameptr += batch_size;
- *frameptr %= num_frames;
- complete_tx_only(xsk, batch_size);
-}
-
-static int get_batch_size(int pkt_cnt)
-{
- if (!opt_pkt_count)
- return BATCH_SIZE;
- if (pkt_cnt + BATCH_SIZE <= opt_pkt_count)
- return BATCH_SIZE;
+ xsk_ring_prod__submit(&xsk->tx, i);
+ if (stat_test_type != STAT_TEST_TX_INVALID)
+ xsk->outstanding_tx += i;
+ else if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+ kick_tx(xsk);
+ complete_pkts(xsk, i);
- return opt_pkt_count - pkt_cnt;
+ return i;
}
-static void complete_tx_only_all(struct ifobject *ifobject)
+static void wait_for_tx_completion(struct xsk_socket_info *xsk)
{
- bool pending;
-
- do {
- pending = false;
- if (ifobject->xsk->outstanding_tx) {
- complete_tx_only(ifobject->xsk, BATCH_SIZE);
- pending = !!ifobject->xsk->outstanding_tx;
- }
- } while (pending);
+ while (xsk->outstanding_tx)
+ complete_pkts(xsk, BATCH_SIZE);
}
-static void tx_only_all(struct ifobject *ifobject)
+static void send_pkts(struct ifobject *ifobject)
{
struct pollfd fds[MAX_SOCKS] = { };
- u32 frame_nb = 0;
- int pkt_cnt = 0;
- int ret;
+ u32 pkt_cnt = 0;
fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
fds[0].events = POLLOUT;
- while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
- int batch_size = get_batch_size(pkt_cnt);
+ while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
+ u32 sent;
if (test_type == TEST_TYPE_POLL) {
+ int ret;
+
ret = poll(fds, 1, POLL_TMOUT);
if (ret <= 0)
continue;
@@ -598,78 +678,30 @@ static void tx_only_all(struct ifobject *ifobject)
continue;
}
- tx_only(ifobject->xsk, &frame_nb, batch_size);
- pkt_cnt += batch_size;
+ sent = __send_pkts(ifobject, pkt_cnt);
+ pkt_cnt += sent;
+ usleep(10);
}
- if (opt_pkt_count)
- complete_tx_only_all(ifobject);
+ wait_for_tx_completion(ifobject->xsk);
}
-static void worker_pkt_dump(void)
-{
- struct ethhdr *ethhdr;
- struct iphdr *iphdr;
- struct udphdr *udphdr;
- char s[128];
- int payload;
- void *ptr;
-
- fprintf(stdout, "---------------------------------------\n");
- for (int iter = 0; iter < num_frames - 1; iter++) {
- ptr = pkt_buf[iter]->payload;
- ethhdr = ptr;
- iphdr = ptr + sizeof(*ethhdr);
- udphdr = ptr + sizeof(*ethhdr) + sizeof(*iphdr);
-
- /*extract L2 frame */
- fprintf(stdout, "DEBUG>> L2: dst mac: ");
- for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ethhdr->h_dest[i]);
-
- fprintf(stdout, "\nDEBUG>> L2: src mac: ");
- for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ethhdr->h_source[i]);
-
- /*extract L3 frame */
- fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
- fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
- inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
- fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
- inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
- /*extract L4 frame */
- fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
- fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
- /*extract L5 frame */
- payload = *((uint32_t *)(ptr + PKT_HDR_SIZE));
-
- if (payload == EOT) {
- print_verbose("End-of-transmission frame received\n");
- fprintf(stdout, "---------------------------------------\n");
- break;
- }
- fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload);
- fprintf(stdout, "---------------------------------------\n");
- }
-}
-
-static void worker_stats_validate(struct ifobject *ifobject)
+static bool rx_stats_are_valid(struct ifobject *ifobject)
{
+ u32 xsk_stat = 0, expected_stat = ifobject->pkt_stream->nb_pkts;
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
struct xdp_statistics stats;
socklen_t optlen;
int err;
- struct xsk_socket *xsk = stat_test_type == STAT_TEST_TX_INVALID ?
- ifdict[!ifobject->ifdict_index]->xsk->xsk :
- ifobject->xsk->xsk;
- int fd = xsk_socket__fd(xsk);
- unsigned long xsk_stat = 0, expected_stat = opt_pkt_count;
-
- sigvar = 0;
optlen = sizeof(stats);
err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
- if (err)
- return;
+ if (err) {
+ ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return true;
+ }
if (optlen == sizeof(struct xdp_statistics)) {
switch (stat_test_type) {
@@ -677,8 +709,7 @@ static void worker_stats_validate(struct ifobject *ifobject)
xsk_stat = stats.rx_dropped;
break;
case STAT_TEST_TX_INVALID:
- xsk_stat = stats.tx_invalid_descs;
- break;
+ return true;
case STAT_TEST_RX_FULL:
xsk_stat = stats.rx_ring_full;
expected_stat -= RX_FULL_RXQSIZE;
@@ -691,99 +722,70 @@ static void worker_stats_validate(struct ifobject *ifobject)
}
if (xsk_stat == expected_stat)
- sigvar = 1;
+ return true;
}
+
+ return false;
}
-static void worker_pkt_validate(void)
+static void tx_stats_validate(struct ifobject *ifobject)
{
- u32 payloadseqnum = -2;
- struct iphdr *iphdr;
-
- while (1) {
- pkt_node_rx_q = TAILQ_LAST(&head, head_s);
- if (!pkt_node_rx_q)
- break;
-
- iphdr = (struct iphdr *)(pkt_node_rx_q->pkt_frame + sizeof(struct ethhdr));
-
- /*do not increment pktcounter if !(tos=0x9 and ipv4) */
- if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
- payloadseqnum = *((uint32_t *)(pkt_node_rx_q->pkt_frame + PKT_HDR_SIZE));
- if (debug_pkt_dump && payloadseqnum != EOT) {
- pkt_obj = malloc(sizeof(*pkt_obj));
- pkt_obj->payload = malloc(PKT_SIZE);
- memcpy(pkt_obj->payload, pkt_node_rx_q->pkt_frame, PKT_SIZE);
- pkt_buf[payloadseqnum] = pkt_obj;
- }
-
- if (payloadseqnum == EOT) {
- print_verbose("End-of-transmission frame received: PASS\n");
- sigvar = 1;
- break;
- }
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
+ struct xdp_statistics stats;
+ socklen_t optlen;
+ int err;
- if (prev_pkt + 1 != payloadseqnum) {
- ksft_test_result_fail
- ("ERROR: [%s] prev_pkt [%d], payloadseqnum [%d]\n",
- __func__, prev_pkt, payloadseqnum);
- ksft_exit_xfail();
- }
+ optlen = sizeof(stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+ if (err) {
+ ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return;
+ }
- prev_pkt = payloadseqnum;
- pkt_counter++;
- } else {
- ksft_print_msg("Invalid frame received: ");
- ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version,
- iphdr->tos);
- }
+ if (stats.tx_invalid_descs == ifobject->pkt_stream->nb_pkts)
+ return;
- TAILQ_REMOVE(&head, pkt_node_rx_q, pkt_nodes);
- free(pkt_node_rx_q->pkt_frame);
- free(pkt_node_rx_q);
- pkt_node_rx_q = NULL;
- }
+ ksft_test_result_fail("ERROR: [%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
+ __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
}
static void thread_common_ops(struct ifobject *ifobject, void *bufs)
{
- int umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ u64 umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ size_t mmap_sz = umem_sz;
int ctr = 0;
int ret;
ifobject->ns_fd = switch_namespace(ifobject->nsname);
if (test_type == TEST_TYPE_BPF_RES)
- umem_sz *= 2;
+ mmap_sz *= 2;
- bufs = mmap(NULL, umem_sz,
- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
if (bufs == MAP_FAILED)
exit_with_error(errno);
- xsk_configure_umem(ifobject, bufs, 0);
- ifobject->umem = ifobject->umem_arr[0];
- ret = xsk_configure_socket(ifobject, 0);
-
- /* Retry Create Socket if it fails as xsk_socket__create()
- * is asynchronous
- */
- while (ret && ctr < SOCK_RECONF_CTR) {
- xsk_configure_umem(ifobject, bufs, 0);
+ while (ctr++ < SOCK_RECONF_CTR) {
+ xsk_configure_umem(ifobject, bufs, umem_sz, 0);
ifobject->umem = ifobject->umem_arr[0];
ret = xsk_configure_socket(ifobject, 0);
+ if (!ret)
+ break;
+
+ /* Retry Create Socket if it fails as xsk_socket__create() is asynchronous */
usleep(USLEEP_MAX);
- ctr++;
+ if (ctr >= SOCK_RECONF_CTR)
+ exit_with_error(-ret);
}
- if (ctr >= SOCK_RECONF_CTR)
- exit_with_error(ret);
-
ifobject->umem = ifobject->umem_arr[0];
ifobject->xsk = ifobject->xsk_arr[0];
if (test_type == TEST_TYPE_BPF_RES) {
- xsk_configure_umem(ifobject, (u8 *)bufs + (umem_sz / 2), 1);
+ xsk_configure_umem(ifobject, (u8 *)bufs + umem_sz, umem_sz, 1);
ifobject->umem = ifobject->umem_arr[1];
ret = xsk_configure_socket(ifobject, 1);
}
@@ -809,33 +811,18 @@ static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
static void *worker_testapp_validate_tx(void *arg)
{
- struct udphdr *udp_hdr =
- (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
- struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr));
- struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
struct ifobject *ifobject = (struct ifobject *)arg;
- struct generic_data data;
void *bufs = NULL;
if (!second_step)
thread_common_ops(ifobject, bufs);
- for (int i = 0; i < num_frames; i++) {
- /*send EOT frame */
- if (i == (num_frames - 1))
- data.seqnum = -1;
- else
- data.seqnum = i;
- gen_udp_hdr(&data, ifobject, udp_hdr);
- gen_ip_hdr(ifobject, ip_hdr);
- gen_udp_csum(udp_hdr, ip_hdr);
- gen_eth_hdr(ifobject, eth_hdr);
- gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
- }
+ print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
+ ifobject->ifname);
+ send_pkts(ifobject);
- print_verbose("Sending %d packets on interface %s\n",
- (opt_pkt_count - 1), ifobject->ifname);
- tx_only_all(ifobject);
+ if (stat_test_type == STAT_TEST_TX_INVALID)
+ tx_stats_validate(ifobject);
testapp_cleanup_xsk_res(ifobject);
pthread_exit(NULL);
@@ -853,31 +840,16 @@ static void *worker_testapp_validate_rx(void *arg)
if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
xsk_populate_fill_ring(ifobject->umem);
- TAILQ_INIT(&head);
- if (debug_pkt_dump) {
- pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
- if (!pkt_buf)
- exit_with_error(errno);
- }
-
fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
fds[0].events = POLLIN;
pthread_barrier_wait(&barr);
- while (1) {
- if (test_type != TEST_TYPE_STATS) {
- rx_pkt(ifobject->xsk, fds);
- worker_pkt_validate();
- } else {
- worker_stats_validate(ifobject);
- }
- if (sigvar)
- break;
- }
-
- print_verbose("Received %d packets on interface %s\n",
- pkt_counter, ifobject->ifname);
+ if (test_type == TEST_TYPE_STATS)
+ while (!rx_stats_are_valid(ifobject))
+ continue;
+ else
+ receive_pkts(ifobject->pkt_stream, ifobject->xsk, fds);
if (test_type == TEST_TYPE_TEARDOWN)
print_verbose("Destroying socket\n");
@@ -890,10 +862,18 @@ static void testapp_validate(void)
{
bool bidi = test_type == TEST_TYPE_BIDI;
bool bpf = test_type == TEST_TYPE_BPF_RES;
+ struct pkt_stream *pkt_stream;
if (pthread_barrier_init(&barr, NULL, 2))
exit_with_error(errno);
+ if (stat_test_type == STAT_TEST_TX_INVALID)
+ pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
+ else
+ pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, PKT_SIZE);
+ ifdict_tx->pkt_stream = pkt_stream;
+ ifdict_rx->pkt_stream = pkt_stream;
+
/*Spawn RX thread */
pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
@@ -907,15 +887,6 @@ static void testapp_validate(void)
pthread_join(t1, NULL);
pthread_join(t0, NULL);
- if (debug_pkt_dump && test_type != TEST_TYPE_STATS) {
- worker_pkt_dump();
- for (int iter = 0; iter < num_frames - 1; iter++) {
- free(pkt_buf[iter]->payload);
- free(pkt_buf[iter]);
- }
- free(pkt_buf);
- }
-
if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
print_ksft_result();
}
@@ -925,9 +896,6 @@ static void testapp_teardown(void)
int i;
for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
}
@@ -953,9 +921,6 @@ static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
static void testapp_bidi(void)
{
for (int i = 0; i < MAX_BIDI_ITER; i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
if (!second_step) {
@@ -987,9 +952,6 @@ static void testapp_bpf_res(void)
int i;
for (i = 0; i < MAX_BPF_ITER; i++) {
- pkt_counter = 0;
- prev_pkt = -1;
- sigvar = 0;
print_verbose("Creating socket\n");
testapp_validate();
if (!second_step)
@@ -1017,6 +979,8 @@ static void testapp_stats(void)
case STAT_TEST_RX_FULL:
rxqsize = RX_FULL_RXQSIZE;
break;
+ case STAT_TEST_TX_INVALID:
+ continue;
default:
break;
}
@@ -1062,10 +1026,7 @@ static void run_pkt_test(int mode, int type)
/* reset defaults after potential previous test */
xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
- pkt_counter = 0;
second_step = 0;
- prev_pkt = -1;
- sigvar = 0;
stat_test_type = -1;
rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
@@ -1102,62 +1063,70 @@ static void run_pkt_test(int mode, int type)
}
}
+static struct ifobject *ifobject_create(void)
+{
+ struct ifobject *ifobj;
+
+ ifobj = calloc(1, sizeof(struct ifobject));
+ if (!ifobj)
+ return NULL;
+
+ ifobj->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
+ if (!ifobj->xsk_arr)
+ goto out_xsk_arr;
+
+ ifobj->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
+ if (!ifobj->umem_arr)
+ goto out_umem_arr;
+
+ return ifobj;
+
+out_umem_arr:
+ free(ifobj->xsk_arr);
+out_xsk_arr:
+ free(ifobj);
+ return NULL;
+}
+
+static void ifobject_delete(struct ifobject *ifobj)
+{
+ free(ifobj->umem_arr);
+ free(ifobj->xsk_arr);
+ free(ifobj);
+}
+
int main(int argc, char **argv)
{
struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
- bool failure = false;
int i, j;
if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
exit_with_error(errno);
- for (int i = 0; i < MAX_INTERFACES; i++) {
- ifdict[i] = malloc(sizeof(struct ifobject));
+ for (i = 0; i < MAX_INTERFACES; i++) {
+ ifdict[i] = ifobject_create();
if (!ifdict[i])
- exit_with_error(errno);
-
- ifdict[i]->ifdict_index = i;
- ifdict[i]->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
- if (!ifdict[i]->xsk_arr) {
- failure = true;
- goto cleanup;
- }
- ifdict[i]->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
- if (!ifdict[i]->umem_arr) {
- failure = true;
- goto cleanup;
- }
+ exit_with_error(ENOMEM);
}
setlocale(LC_ALL, "");
parse_command_line(argc, argv);
- num_frames = ++opt_pkt_count;
-
- init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
- init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
+ init_iface(ifdict[tx], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
+ init_iface(ifdict[rx], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
- for (i = 0; i < TEST_MODE_MAX; i++) {
- for (j = 0; j < TEST_TYPE_MAX; j++)
+ for (i = 0; i < TEST_MODE_MAX; i++)
+ for (j = 0; j < TEST_TYPE_MAX; j++) {
run_pkt_test(i, j);
- }
-
-cleanup:
- for (int i = 0; i < MAX_INTERFACES; i++) {
- if (ifdict[i]->ns_fd != -1)
- close(ifdict[i]->ns_fd);
- free(ifdict[i]->xsk_arr);
- free(ifdict[i]->umem_arr);
- free(ifdict[i]);
- }
+ usleep(USLEEP_MAX);
+ }
- if (failure)
- exit_with_error(errno);
+ for (i = 0; i < MAX_INTERFACES; i++)
+ ifobject_delete(ifdict[i]);
ksft_exit_pass();
-
return 0;
}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 6c428b276ab6..7e49b9fbe25e 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -34,28 +34,23 @@
#define IP_PKT_TOS 0x9
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
-#define EOT (-1)
-#define USLEEP_MAX 200000
+#define USLEEP_MAX 10000
#define SOCK_RECONF_CTR 10
-#define BATCH_SIZE 64
+#define BATCH_SIZE 8
#define POLL_TMOUT 1000
-#define DEFAULT_PKT_CNT 10000
+#define DEFAULT_PKT_CNT (4 * 1024)
#define RX_FULL_RXQSIZE 32
+#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
-typedef __u32 u32;
-typedef __u16 u16;
-typedef __u8 u8;
-
-enum TEST_MODES {
- TEST_MODE_UNCONFIGURED = -1,
+enum test_mode {
TEST_MODE_SKB,
TEST_MODE_DRV,
TEST_MODE_MAX
};
-enum TEST_TYPES {
+enum test_type {
TEST_TYPE_NOPOLL,
TEST_TYPE_POLL,
TEST_TYPE_TEARDOWN,
@@ -65,7 +60,7 @@ enum TEST_TYPES {
TEST_TYPE_MAX
};
-enum STAT_TEST_TYPES {
+enum stat_test_type {
STAT_TEST_RX_DROPPED,
STAT_TEST_TX_INVALID,
STAT_TEST_RX_FULL,
@@ -73,21 +68,16 @@ enum STAT_TEST_TYPES {
STAT_TEST_TYPE_MAX
};
-static int configured_mode = TEST_MODE_UNCONFIGURED;
-static u8 debug_pkt_dump;
-static u32 num_frames;
+static int configured_mode;
+static bool opt_pkt_dump;
+static u32 num_frames = DEFAULT_PKT_CNT / 4;
static bool second_step;
static int test_type;
-static int opt_pkt_count;
-static u8 opt_verbose;
+static bool opt_verbose;
static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
-static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
-static u32 pkt_counter;
-static long prev_pkt = -1;
-static int sigvar;
static int stat_test_type;
static u32 rxqsize;
static u32 frame_headroom;
@@ -104,10 +94,6 @@ struct xsk_socket_info {
struct xsk_ring_prod tx;
struct xsk_umem_info *umem;
struct xsk_socket *xsk;
- unsigned long rx_npkts;
- unsigned long tx_npkts;
- unsigned long prev_rx_npkts;
- unsigned long prev_tx_npkts;
u32 outstanding_tx;
};
@@ -118,8 +104,15 @@ struct flow_vector {
} vector;
};
-struct generic_data {
- u32 seqnum;
+struct pkt {
+ u64 addr;
+ u32 len;
+ u32 payload;
+};
+
+struct pkt_stream {
+ u32 nb_pkts;
+ struct pkt *pkts;
};
struct ifobject {
@@ -131,8 +124,8 @@ struct ifobject {
struct xsk_umem_info *umem;
void *(*func_ptr)(void *arg);
struct flow_vector fv;
+ struct pkt_stream *pkt_stream;
int ns_fd;
- int ifdict_index;
u32 dst_ip;
u32 src_ip;
u16 src_port;
@@ -149,18 +142,4 @@ static struct ifobject *ifdict_tx;
pthread_barrier_t barr;
pthread_t t0, t1;
-TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
-struct head_s *head_p;
-struct pkt {
- char *pkt_frame;
-
- TAILQ_ENTRY(pkt) pkt_nodes;
-} *pkt_node_rx, *pkt_node_rx_q;
-
-struct pkt_frame {
- char *payload;
-} *pkt_obj;
-
-struct pkt_frame **pkt_buf;
-
#endif /* XDPXCEIVER_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index dac1c5f78752..bf29d2549bee 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -8,14 +8,8 @@ ksft_xfail=2
ksft_xpass=3
ksft_skip=4
-GREEN='\033[0;92m'
-YELLOW='\033[0;93m'
-RED='\033[0;31m'
-NC='\033[0m'
-STACK_LIM=131072
SPECFILE=veth.spec
XSKOBJ=xdpxceiver
-NUMPKTS=10000
validate_root_exec()
{
@@ -50,22 +44,12 @@ validate_veth_spec_file()
test_status()
{
statusval=$1
- if [ -n "${colorconsole+set}" ]; then
- if [ $statusval -eq 2 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${RED}FAIL${NC} ]"
- elif [ $statusval -eq 1 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${RED}SKIPPED${NC} ]"
- elif [ $statusval -eq 0 ]; then
- echo -e "${YELLOW}$2${NC}: [ ${GREEN}PASS${NC} ]"
- fi
- else
- if [ $statusval -eq 2 ]; then
- echo -e "$2: [ FAIL ]"
- elif [ $statusval -eq 1 ]; then
- echo -e "$2: [ SKIPPED ]"
- elif [ $statusval -eq 0 ]; then
- echo -e "$2: [ PASS ]"
- fi
+ if [ $statusval -eq 2 ]; then
+ echo -e "$2: [ FAIL ]"
+ elif [ $statusval -eq 1 ]; then
+ echo -e "$2: [ SKIPPED ]"
+ elif [ $statusval -eq 0 ]; then
+ echo -e "$2: [ PASS ]"
fi
}
@@ -107,5 +91,5 @@ validate_ip_utility()
execxdpxceiver()
{
- ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} -C ${NUMPKTS} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
+ ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
}
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index 84cfcabea838..be9643ef6285 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -2,4 +2,5 @@
test_memcontrol
test_core
test_freezer
-test_kmem \ No newline at end of file
+test_kmem
+test_kill
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index f027d933595b..59e222460581 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -9,6 +9,7 @@ TEST_GEN_PROGS = test_memcontrol
TEST_GEN_PROGS += test_kmem
TEST_GEN_PROGS += test_core
TEST_GEN_PROGS += test_freezer
+TEST_GEN_PROGS += test_kill
include ../lib.mk
@@ -16,3 +17,4 @@ $(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_kill: cgroup_util.c ../clone3/clone3_selftests.h ../pidfd/pidfd.h
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 027014662fb2..623cec04ad42 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -5,10 +5,12 @@
#include <errno.h>
#include <fcntl.h>
#include <linux/limits.h>
+#include <poll.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/inotify.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
@@ -252,6 +254,10 @@ int cg_killall(const char *cgroup)
char buf[PAGE_SIZE];
char *ptr = buf;
+ /* If cgroup.kill exists use it. */
+ if (!cg_write(cgroup, "cgroup.kill", "1"))
+ return 0;
+
if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
return -1;
@@ -576,3 +582,48 @@ int clone_into_cgroup_run_wait(const char *cgroup)
(void)clone_reap(pid, WEXITED);
return 0;
}
+
+int cg_prepare_for_wait(const char *cgroup)
+{
+ int fd, ret = -1;
+
+ fd = inotify_init1(0);
+ if (fd == -1)
+ return fd;
+
+ ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
+ IN_MODIFY);
+ if (ret == -1) {
+ close(fd);
+ fd = -1;
+ }
+
+ return fd;
+}
+
+int cg_wait_for(int fd)
+{
+ int ret = -1;
+ struct pollfd fds = {
+ .fd = fd,
+ .events = POLLIN,
+ };
+
+ while (true) {
+ ret = poll(&fds, 1, 10000);
+
+ if (ret == -1) {
+ if (errno == EINTR)
+ continue;
+
+ break;
+ }
+
+ if (ret > 0 && fds.revents & POLLIN) {
+ ret = 0;
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 5a1305dd1f0b..82e59cdf16e7 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -54,3 +54,5 @@ extern pid_t clone_into_cgroup(int cgroup_fd);
extern int clone_reap(pid_t pid, int options);
extern int clone_into_cgroup_run_wait(const char *cgroup);
extern int dirfd_open_opath(const char *dir);
+extern int cg_prepare_for_wait(const char *cgroup);
+extern int cg_wait_for(int fd);
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
index 23d8fa4a3e4e..ff519029f6f4 100644
--- a/tools/testing/selftests/cgroup/test_freezer.c
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -7,9 +7,7 @@
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
-#include <poll.h>
#include <stdlib.h>
-#include <sys/inotify.h>
#include <string.h>
#include <sys/wait.h>
@@ -55,61 +53,6 @@ static int cg_freeze_nowait(const char *cgroup, bool freeze)
}
/*
- * Prepare for waiting on cgroup.events file.
- */
-static int cg_prepare_for_wait(const char *cgroup)
-{
- int fd, ret = -1;
-
- fd = inotify_init1(0);
- if (fd == -1) {
- debug("Error: inotify_init1() failed\n");
- return fd;
- }
-
- ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
- IN_MODIFY);
- if (ret == -1) {
- debug("Error: inotify_add_watch() failed\n");
- close(fd);
- fd = -1;
- }
-
- return fd;
-}
-
-/*
- * Wait for an event. If there are no events for 10 seconds,
- * treat this an error.
- */
-static int cg_wait_for(int fd)
-{
- int ret = -1;
- struct pollfd fds = {
- .fd = fd,
- .events = POLLIN,
- };
-
- while (true) {
- ret = poll(&fds, 1, 10000);
-
- if (ret == -1) {
- if (errno == EINTR)
- continue;
- debug("Error: poll() failed\n");
- break;
- }
-
- if (ret > 0 && fds.revents & POLLIN) {
- ret = 0;
- break;
- }
- }
-
- return ret;
-}
-
-/*
* Attach a task to the given cgroup and wait for a cgroup frozen event.
* All transient events (e.g. populated) are ignored.
*/
diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c
new file mode 100644
index 000000000000..6153690319c9
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_kill.c
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <errno.h>
+#include <linux/limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+#include "../pidfd/pidfd.h"
+#include "cgroup_util.h"
+
+/*
+ * Kill the given cgroup and wait for the inotify signal.
+ * If there are no events in 10 seconds, treat this as an error.
+ * Then check that the cgroup is in the desired state.
+ */
+static int cg_kill_wait(const char *cgroup)
+{
+ int fd, ret = -1;
+
+ fd = cg_prepare_for_wait(cgroup);
+ if (fd < 0)
+ return fd;
+
+ ret = cg_write(cgroup, "cgroup.kill", "1");
+ if (ret)
+ goto out;
+
+ ret = cg_wait_for(fd);
+ if (ret)
+ goto out;
+
+out:
+ close(fd);
+ return ret;
+}
+
+/*
+ * A simple process running in a sleep loop until being
+ * re-parented.
+ */
+static int child_fn(const char *cgroup, void *arg)
+{
+ int ppid = getppid();
+
+ while (getppid() == ppid)
+ usleep(1000);
+
+ return getppid() == ppid;
+}
+
+static int test_cgkill_simple(const char *root)
+{
+ pid_t pids[100];
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ int i;
+
+ cgroup = cg_name(root, "cg_test_simple");
+ if (!cgroup)
+ goto cleanup;
+
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ for (i = 0; i < 100; i++)
+ pids[i] = cg_run_nowait(cgroup, child_fn, NULL);
+
+ if (cg_wait_for_proc_count(cgroup, 100))
+ goto cleanup;
+
+ if (cg_read_strcmp(cgroup, "cgroup.events", "populated 1\n"))
+ goto cleanup;
+
+ if (cg_kill_wait(cgroup))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = 0; i < 100; i++)
+ wait_for_pid(pids[i]);
+
+ if (ret == KSFT_PASS &&
+ cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n"))
+ ret = KSFT_FAIL;
+
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * The test creates the following hierarchy:
+ * A
+ * / / \ \
+ * B E I K
+ * /\ |
+ * C D F
+ * |
+ * G
+ * |
+ * H
+ *
+ * with a process in C, H and 3 processes in K.
+ * Then it tries to kill the whole tree.
+ */
+static int test_cgkill_tree(const char *root)
+{
+ pid_t pids[5];
+ char *cgroup[10] = {0};
+ int ret = KSFT_FAIL;
+ int i;
+
+ cgroup[0] = cg_name(root, "cg_test_tree_A");
+ if (!cgroup[0])
+ goto cleanup;
+
+ cgroup[1] = cg_name(cgroup[0], "B");
+ if (!cgroup[1])
+ goto cleanup;
+
+ cgroup[2] = cg_name(cgroup[1], "C");
+ if (!cgroup[2])
+ goto cleanup;
+
+ cgroup[3] = cg_name(cgroup[1], "D");
+ if (!cgroup[3])
+ goto cleanup;
+
+ cgroup[4] = cg_name(cgroup[0], "E");
+ if (!cgroup[4])
+ goto cleanup;
+
+ cgroup[5] = cg_name(cgroup[4], "F");
+ if (!cgroup[5])
+ goto cleanup;
+
+ cgroup[6] = cg_name(cgroup[5], "G");
+ if (!cgroup[6])
+ goto cleanup;
+
+ cgroup[7] = cg_name(cgroup[6], "H");
+ if (!cgroup[7])
+ goto cleanup;
+
+ cgroup[8] = cg_name(cgroup[0], "I");
+ if (!cgroup[8])
+ goto cleanup;
+
+ cgroup[9] = cg_name(cgroup[0], "K");
+ if (!cgroup[9])
+ goto cleanup;
+
+ for (i = 0; i < 10; i++)
+ if (cg_create(cgroup[i]))
+ goto cleanup;
+
+ pids[0] = cg_run_nowait(cgroup[2], child_fn, NULL);
+ pids[1] = cg_run_nowait(cgroup[7], child_fn, NULL);
+ pids[2] = cg_run_nowait(cgroup[9], child_fn, NULL);
+ pids[3] = cg_run_nowait(cgroup[9], child_fn, NULL);
+ pids[4] = cg_run_nowait(cgroup[9], child_fn, NULL);
+
+ /*
+ * Wait until all child processes will enter
+ * corresponding cgroups.
+ */
+
+ if (cg_wait_for_proc_count(cgroup[2], 1) ||
+ cg_wait_for_proc_count(cgroup[7], 1) ||
+ cg_wait_for_proc_count(cgroup[9], 3))
+ goto cleanup;
+
+ /*
+ * Kill A and check that we get an empty notification.
+ */
+ if (cg_kill_wait(cgroup[0]))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = 0; i < 5; i++)
+ wait_for_pid(pids[i]);
+
+ if (ret == KSFT_PASS &&
+ cg_read_strcmp(cgroup[0], "cgroup.events", "populated 0\n"))
+ ret = KSFT_FAIL;
+
+ for (i = 9; i >= 0 && cgroup[i]; i--) {
+ cg_destroy(cgroup[i]);
+ free(cgroup[i]);
+ }
+
+ return ret;
+}
+
+static int forkbomb_fn(const char *cgroup, void *arg)
+{
+ int ppid;
+
+ fork();
+ fork();
+
+ ppid = getppid();
+
+ while (getppid() == ppid)
+ usleep(1000);
+
+ return getppid() == ppid;
+}
+
+/*
+ * The test runs a fork bomb in a cgroup and tries to kill it.
+ */
+static int test_cgkill_forkbomb(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ pid_t pid = -ESRCH;
+
+ cgroup = cg_name(root, "cg_forkbomb_test");
+ if (!cgroup)
+ goto cleanup;
+
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ pid = cg_run_nowait(cgroup, forkbomb_fn, NULL);
+ if (pid < 0)
+ goto cleanup;
+
+ usleep(100000);
+
+ if (cg_kill_wait(cgroup))
+ goto cleanup;
+
+ if (cg_wait_for_proc_count(cgroup, 0))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (pid > 0)
+ wait_for_pid(pid);
+
+ if (ret == KSFT_PASS &&
+ cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n"))
+ ret = KSFT_FAIL;
+
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct cgkill_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_cgkill_simple),
+ T(test_cgkill_tree),
+ T(test_cgkill_forkbomb),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cpufreq/config b/tools/testing/selftests/cpufreq/config
index 27ff72ebd0f5..75e900793e8a 100644
--- a/tools/testing/selftests/cpufreq/config
+++ b/tools/testing/selftests/cpufreq/config
@@ -6,7 +6,7 @@ CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_DEBUG_PI_LIST=y
+CONFIG_DEBUG_PLIST=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_DEBUG_LOCK_ALLOC=y
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
new file mode 100644
index 000000000000..8a3f2cd9fec0
--- /dev/null
+++ b/tools/testing/selftests/damon/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for damon selftests
+
+TEST_FILES = _chk_dependency.sh
+TEST_PROGS = debugfs_attrs.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh
new file mode 100644
index 000000000000..0189db81550b
--- /dev/null
+++ b/tools/testing/selftests/damon/_chk_dependency.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+DBGFS=/sys/kernel/debug/damon
+
+if [ $EUID -ne 0 ];
+then
+ echo "Run as root"
+ exit $ksft_skip
+fi
+
+if [ ! -d "$DBGFS" ]
+then
+ echo "$DBGFS not found"
+ exit $ksft_skip
+fi
+
+for f in attrs target_ids monitor_on
+do
+ if [ ! -f "$DBGFS/$f" ]
+ then
+ echo "$f not found"
+ exit 1
+ fi
+done
diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh
new file mode 100644
index 000000000000..bfabb19dc0d3
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_attrs.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+test_write_result() {
+ file=$1
+ content=$2
+ orig_content=$3
+ expect_reason=$4
+ expected=$5
+
+ echo "$content" > "$file"
+ if [ $? -ne "$expected" ]
+ then
+ echo "writing $content to $file doesn't return $expected"
+ echo "expected because: $expect_reason"
+ echo "$orig_content" > "$file"
+ exit 1
+ fi
+}
+
+test_write_succ() {
+ test_write_result "$1" "$2" "$3" "$4" 0
+}
+
+test_write_fail() {
+ test_write_result "$1" "$2" "$3" "$4" 1
+}
+
+test_content() {
+ file=$1
+ orig_content=$2
+ expected=$3
+ expect_reason=$4
+
+ content=$(cat "$file")
+ if [ "$content" != "$expected" ]
+ then
+ echo "reading $file expected $expected but $content"
+ echo "expected because: $expect_reason"
+ echo "$orig_content" > "$file"
+ exit 1
+ fi
+}
+
+source ./_chk_dependency.sh
+
+# Test attrs file
+# ===============
+
+file="$DBGFS/attrs"
+orig_content=$(cat "$file")
+
+test_write_succ "$file" "1 2 3 4 5" "$orig_content" "valid input"
+test_write_fail "$file" "1 2 3 4" "$orig_content" "no enough fields"
+test_write_fail "$file" "1 2 3 5 4" "$orig_content" \
+ "min_nr_regions > max_nr_regions"
+test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written"
+echo "$orig_content" > "$file"
+
+# Test target_ids file
+# ====================
+
+file="$DBGFS/target_ids"
+orig_content=$(cat "$file")
+
+test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input"
+test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input"
+test_content "$file" "$orig_content" "1 2" "non-integer was there"
+test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input"
+test_content "$file" "$orig_content" "" "wrong input written"
+test_write_succ "$file" "" "$orig_content" "empty input"
+test_content "$file" "$orig_content" "" "empty input written"
+echo "$orig_content" > "$file"
+
+echo "PASS"
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
index 4de902ea14d8..de1c4e6de0b2 100644
--- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -1,10 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
-#include <linux/fcntl.h>
+#include <fcntl.h>
#include <malloc.h>
#include <sys/ioctl.h>
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index 4029833f7e27..160891dcb4bc 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -109,6 +109,9 @@ router_destroy()
__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
tc qdisc del dev $rp2 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
}
setup_prepare()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 42d44e27802c..190c1b6b5365 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -111,6 +111,9 @@ router_destroy()
__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
tc qdisc del dev $rp2 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
}
setup_prepare()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
index 65f43a7ce9c9..1e9a4aff76a2 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
@@ -7,6 +7,8 @@
PORT_NUM_NETIFS=0
+declare -a unsplit
+
port_setup_prepare()
{
:
@@ -20,12 +22,12 @@ port_cleanup()
devlink port unsplit $port
check_err $? "Did not unsplit $netdev"
done
+ unsplit=()
}
split_all_ports()
{
local should_fail=$1; shift
- local -a unsplit
# Loop over the splittable netdevs and create tuples of netdev along
# with its width. For example:
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
index 5cbff8038f84..28a570006d4d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -93,7 +93,9 @@ switch_destroy()
lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
lldpad_app_wait_del
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
index 27de3d9ed08e..f4493ef9cca1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -29,37 +29,38 @@ cleanup()
get_prio_pg()
{
- __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
- grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2-
+ # Produces a string of numbers "<B0> <B1> ... <B7> ", where BX is number
+ # of buffer that priority X is mapped to.
+ dcb -j buffer show dev $swp |
+ jq -r '[.prio_buffer | .[] | tostring + " "] | add'
}
get_prio_pfc()
{
- __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
- grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2-
+ # Produces a string of numbers "<P0> <P1> ... <P7> ", where PX denotes
+ # whether priority X has PFC enabled (the value is 1) or disabled (0).
+ dcb -j pfc show dev $swp |
+ jq -r '[.prio_pfc | .[] | if . then "1 " else "0 " end] | add'
}
get_prio_tc()
{
- __mlnx_qos -i $swp | sed -n '/^tc/,$p' |
- awk '/^tc/ { TC = $2 }
- /priority:/ { PRIO[$2]=TC }
- END {
- for (i in PRIO)
- printf("%d ", PRIO[i])
- }'
+ # Produces a string of numbers "<T0> <T1> ... <T7> ", where TC is number
+ # of TC that priority X is mapped to.
+ dcb -j ets show dev $swp |
+ jq -r '[.prio_tc | .[] | tostring + " "] | add'
}
get_buf_size()
{
local idx=$1; shift
- __mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1))
+ dcb -j buffer show dev $swp | jq ".buffer_size[$idx]"
}
get_tot_size()
{
- __mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//'
+ dcb -j buffer show dev $swp | jq '.total_size'
}
check_prio_pg()
@@ -121,18 +122,18 @@ test_dcb_ets()
{
RET=0
- __mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null
+ dcb ets set dev $swp prio-tc 0:0 1:2 2:4 3:6 4:1 5:3 6:5 7:7
check_prio_pg "0 2 4 6 1 3 5 7 "
check_prio_tc "0 2 4 6 1 3 5 7 "
check_prio_pfc "0 0 0 0 0 0 0 0 "
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+ dcb ets set dev $swp prio-tc all:0
check_prio_pg "0 0 0 0 0 0 0 0 "
check_prio_tc "0 0 0 0 0 0 0 0 "
- __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null
+ dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 2>/dev/null
check_fail $? "prio2buffer accepted in DCB mode"
log_test "Configuring headroom through ETS"
@@ -174,7 +175,7 @@ test_pfc()
{
RET=0
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null
+ dcb ets set dev $swp prio-tc all:0 5:1 6:2 7:3
local buf0size=$(get_buf_size 0)
local buf1size=$(get_buf_size 1)
@@ -193,7 +194,7 @@ test_pfc()
RET=0
- __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null
+ dcb pfc set dev $swp prio-pfc all:off 5:on 6:on 7:on delay 0
check_prio_pg "0 0 0 0 0 1 2 3 "
check_prio_pfc "0 0 0 0 0 1 1 1 "
@@ -210,7 +211,7 @@ test_pfc()
RET=0
- __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null
+ dcb pfc set dev $swp delay 1000
check_buf_size 0 "== $buf0size"
check_buf_size 1 "> $buf1size"
@@ -221,8 +222,8 @@ test_pfc()
RET=0
- __mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+ dcb pfc set dev $swp prio-pfc all:off delay 0
+ dcb ets set dev $swp prio-tc all:0
check_prio_pg "0 0 0 0 0 0 0 0 "
check_prio_tc "0 0 0 0 0 0 0 0 "
@@ -242,13 +243,13 @@ test_tc_priomap()
{
RET=0
- __mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null
+ dcb ets set dev $swp prio-tc 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
check_prio_pg "0 1 2 3 4 5 6 7 "
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
check_prio_pg "0 0 0 0 0 0 0 0 "
- __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null
+ dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6
check_prio_pg "1 3 5 7 0 2 4 6 "
tc qdisc delete dev $swp root
@@ -256,9 +257,9 @@ test_tc_priomap()
# Clean up.
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
- __mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp prio-buffer all:0
tc qdisc delete dev $swp root
- __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+ dcb ets set dev $swp prio-tc all:0
log_test "TC: priomap"
}
@@ -270,12 +271,12 @@ test_tc_sizes()
RET=0
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
check_fail $? "buffer_size should fail before qdisc is added"
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
check_err $? "buffer_size should pass after qdisc is added"
check_buf_size 0 "== $size" "set size: "
@@ -283,26 +284,26 @@ test_tc_sizes()
check_buf_size 0 "== $size" "set MTU: "
mtu_restore $swp
- __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0
# After replacing the qdisc for the same kind, buffer_size still has to
# work.
tc qdisc replace dev $swp root handle 1: bfifo limit 1M
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
check_buf_size 0 "== $size" "post replace, set size: "
- __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0
# Likewise after replacing for a different kind.
tc qdisc replace dev $swp root handle 2: prio bands 8
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
check_buf_size 0 "== $size" "post replace different kind, set size: "
tc qdisc delete dev $swp root
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
check_fail $? "buffer_size should fail after qdisc is deleted"
log_test "TC: buffer size"
@@ -363,10 +364,10 @@ test_tc_int_buf()
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
test_int_buf "TC: "
- __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0 0:$size
test_int_buf "TC+buffsize: "
- __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+ dcb buffer set dev $swp buffer-size all:0
tc qdisc delete dev $swp root
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
index 0bf76f13c030..faa51012cdac 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -82,17 +82,3 @@ bail_on_lldpad()
fi
fi
}
-
-__mlnx_qos()
-{
- local err
-
- mlnx_qos "$@" 2>/dev/null
- err=$?
-
- if ((err)); then
- echo "Error ($err) in mlnx_qos $@" >/dev/stderr
- fi
-
- return $err
-}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
index 5c7700212f75..5d5622fc2758 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -171,7 +171,7 @@ switch_create()
# assignment.
tc qdisc replace dev $swp1 root handle 1: \
ets bands 8 strict 8 priomap 7 6
- __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp1 prio-buffer all:0 1:1
# $swp2
# -----
@@ -209,8 +209,8 @@ switch_create()
# the lossless prio into a buffer of its own. Don't bother with buffer
# sizes though, there is not going to be any pressure in the "backward"
# direction.
- __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp3 prio-buffer all:0 1:1
+ dcb pfc set dev $swp3 prio-pfc all:off 1:on
# $swp4
# -----
@@ -226,11 +226,11 @@ switch_create()
# Configure qdisc so that we can hand-tune headroom.
tc qdisc replace dev $swp4 root handle 1: \
ets bands 8 strict 8 priomap 7 6
- __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp4 prio-buffer all:0 1:1
+ dcb pfc set dev $swp4 prio-pfc all:off 1:on
# PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
# is (-2*MTU) about 80K of delay provision.
- __mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp4 buffer-size all:0 1:$_100KB
# bridges
# -------
@@ -273,9 +273,9 @@ switch_destroy()
# $swp4
# -----
- __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp4 buffer-size all:0
+ dcb pfc set dev $swp4 prio-pfc all:off
+ dcb buffer set dev $swp4 prio-buffer all:0
tc qdisc del dev $swp4 root
devlink_tc_bind_pool_th_restore $swp4 1 ingress
@@ -288,8 +288,8 @@ switch_destroy()
# $swp3
# -----
- __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null
- __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ dcb pfc set dev $swp3 prio-pfc all:off
+ dcb buffer set dev $swp3 prio-buffer all:0
tc qdisc del dev $swp3 root
devlink_tc_bind_pool_th_restore $swp3 1 egress
@@ -315,7 +315,7 @@ switch_destroy()
# $swp1
# -----
- __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+ dcb buffer set dev $swp1 prio-buffer all:0
tc qdisc del dev $swp1 root
devlink_tc_bind_pool_th_restore $swp1 1 ingress
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
index e93878d42596..683759d29199 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -68,7 +68,7 @@ wait_for_routes()
local t0=$1; shift
local route_count=$1; shift
- local t1=$(ip route | grep -o 'offload' | wc -l)
+ local t1=$(ip route | grep 'offload' | grep -v 'offload_failed' | wc -l)
local delta=$((t1 - t0))
echo $delta
[[ $delta -ge $route_count ]]
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
index 093bed088ad0..373d5f2a846e 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -234,15 +234,15 @@ __tc_sample_rate_test()
psample_capture_start
- ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
-B $dip -t udp dp=52768,sp=42768 -q
psample_capture_stop
pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
- pct=$((100 * (pkts - 100) / 100))
+ pct=$((100 * (pkts - 10000) / 10000))
(( -25 <= pct && pct <= 25))
- check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+ check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
log_test "tc sample rate ($desc)"
@@ -587,15 +587,15 @@ __tc_sample_acl_rate_test()
psample_capture_start
- ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
psample_capture_stop
pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
- pct=$((100 * (pkts - 100) / 100))
+ pct=$((100 * (pkts - 10000) / 10000))
(( -25 <= pct && pct <= 25))
- check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+ check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
# Setup a filter that should not match any packet and make sure packets
# are not sampled.
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 40909c254365..9de1d123f4f5 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -5,12 +5,13 @@ lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="fw_flash_test params_test regions_test reload_test \
netns_reload_test resource_test dev_info_test \
- empty_reporter_test dummy_reporter_test"
+ empty_reporter_test dummy_reporter_test rate_test"
NUM_NETIFS=0
source $lib_dir/lib.sh
BUS_ADDR=10
PORT_COUNT=4
+VF_COUNT=4
DEV_NAME=netdevsim$BUS_ADDR
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/
@@ -507,6 +508,170 @@ dummy_reporter_test()
log_test "dummy reporter test"
}
+rate_leafs_get()
+{
+ local handle=$1
+
+ cmd_jq "devlink port function rate show -j" \
+ '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))'
+}
+
+rate_nodes_get()
+{
+ local handle=$1
+
+ cmd_jq "devlink port function rate show -j" \
+ '.[] | to_entries | .[] | select(.value.type == "node") | .key | select(contains("'$handle'"))'
+}
+
+rate_attr_set()
+{
+ local handle=$1
+ local name=$2
+ local value=$3
+ local units=$4
+
+ devlink port function rate set $handle $name $value$units
+}
+
+rate_attr_get()
+{
+ local handle=$1
+ local name=$2
+
+ cmd_jq "devlink port function rate show $handle -j" '.[][].'$name
+}
+
+rate_attr_tx_rate_check()
+{
+ local handle=$1
+ local name=$2
+ local rate=$3
+ local debug_file=$4
+
+ rate_attr_set $handle $name $rate mbit
+ check_err $? "Failed to set $name value"
+
+ local debug_value=$(cat $debug_file)
+ check_err $? "Failed to read $name value from debugfs"
+ [ "$debug_value" == "$rate" ]
+ check_err $? "Unexpected $name debug value $debug_value != $rate"
+
+ local api_value=$(( $(rate_attr_get $handle $name) * 8 / 1000000 ))
+ check_err $? "Failed to get $name attr value"
+ [ "$api_value" == "$rate" ]
+ check_err $? "Unexpected $name attr value $api_value != $rate"
+}
+
+rate_attr_parent_check()
+{
+ local handle=$1
+ local parent=$2
+ local debug_file=$3
+
+ rate_attr_set $handle parent $parent
+ check_err $? "Failed to set parent"
+
+ debug_value=$(cat $debug_file)
+ check_err $? "Failed to get parent debugfs value"
+ [ "$debug_value" == "$parent" ]
+ check_err $? "Unexpected parent debug value $debug_value != $parent"
+
+ api_value=$(rate_attr_get $r_obj parent)
+ check_err $? "Failed to get parent attr value"
+ [ "$api_value" == "$parent" ]
+ check_err $? "Unexpected parent attr value $api_value != $parent"
+}
+
+rate_node_add()
+{
+ local handle=$1
+
+ devlink port function rate add $handle
+}
+
+rate_node_del()
+{
+ local handle=$1
+
+ devlink port function rate del $handle
+}
+
+rate_test()
+{
+ RET=0
+
+ echo $VF_COUNT > /sys/bus/netdevsim/devices/$DEV_NAME/sriov_numvfs
+ devlink dev eswitch set $DL_HANDLE mode switchdev
+ local leafs=`rate_leafs_get $DL_HANDLE`
+ local num_leafs=`echo $leafs | wc -w`
+ [ "$num_leafs" == "$VF_COUNT" ]
+ check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs"
+
+ rate=10
+ for r_obj in $leafs
+ do
+ rate_attr_tx_rate_check $r_obj tx_share $rate \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/tx_share
+ rate=$(($rate+10))
+ done
+
+ rate=100
+ for r_obj in $leafs
+ do
+ rate_attr_tx_rate_check $r_obj tx_max $rate \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/tx_max
+ rate=$(($rate+100))
+ done
+
+ local node1_name='group1'
+ local node1="$DL_HANDLE/$node1_name"
+ rate_node_add "$node1"
+ check_err $? "Failed to add node $node1"
+
+ local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+ [ $num_nodes == 1 ]
+ check_err $? "Expected 1 rate node in output but got $num_nodes"
+
+ local node_tx_share=10
+ rate_attr_tx_rate_check $node1 tx_share $node_tx_share \
+ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_share
+
+ local node_tx_max=100
+ rate_attr_tx_rate_check $node1 tx_max $node_tx_max \
+ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max
+
+ rate_node_del "$node1"
+ check_err $? "Failed to delete node $node1"
+ local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+ [ $num_nodes == 0 ]
+ check_err $? "Expected 0 rate node but got $num_nodes"
+
+ local node1_name='group1'
+ local node1="$DL_HANDLE/$node1_name"
+ rate_node_add "$node1"
+ check_err $? "Failed to add node $node1"
+
+ rate_attr_parent_check $r_obj $node1_name \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/rate_parent
+
+ local node2_name='group2'
+ local node2="$DL_HANDLE/$node2_name"
+ rate_node_add "$node2"
+ check_err $? "Failed to add node $node2"
+
+ rate_attr_parent_check $node2 $node1_name \
+ $DEBUGFS_DIR/rate_nodes/$node2_name/rate_parent
+ rate_node_del "$node2"
+ check_err $? "Failed to delete node $node2"
+ rate_attr_set "$r_obj" noparent
+ check_err $? "Failed to unset $r_obj parent node"
+ rate_node_del "$node1"
+ check_err $? "Failed to delete node $node1"
+
+ log_test "rate test"
+}
+
setup_prepare()
{
modprobe netdevsim
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
index da49ad2761b5..109900c817be 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -24,13 +24,15 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
SLEEP_TIME=1
NETDEV=""
NUM_NETIFS=0
source $lib_dir/lib.sh
+
+DEVLINK_DEV=
source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
require_command udevadm
@@ -163,6 +165,16 @@ trap_stats_test()
devlink_trap_action_set $trap_name "drop"
devlink_trap_stats_idle_test $trap_name
check_err $? "Stats of trap $trap_name not idle when action is drop"
+
+ echo "y"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+ devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+ check_fail $? "Managed to read trap (hard dropped) statistics when should not"
+ echo "n"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+ devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+ check_err $? "Did not manage to read trap (hard dropped) statistics when should"
+
+ devlink_trap_drop_stats_idle_test $trap_name
+ check_fail $? "Drop stats of trap $trap_name idle when should not"
else
devlink_trap_stats_idle_test $trap_name
check_fail $? "Stats of non-drop trap $trap_name idle when should not"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
index 251f228ce63e..fc794cd30389 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
@@ -33,13 +33,15 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
NUM_NETIFS=0
source $lib_dir/lib.sh
-source $lib_dir/devlink_lib.sh
source $lib_dir/fib_offload_lib.sh
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
ipv4_identical_routes()
{
fib_ipv4_identical_routes_test "testns1"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
index ba75c81cda91..e8e0dc088d6a 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -44,12 +44,14 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/
NUM_NETIFS=0
source $lib_dir/lib.sh
+
+DEVLINK_DEV=
source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
nexthop_check()
{
diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
index ee10b1a8933c..e689ff7a0b12 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/psample.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
@@ -14,13 +14,15 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/
CAPTURE_FILE=$(mktemp)
NUM_NETIFS=0
source $lib_dir/lib.sh
+
+DEVLINK_DEV=
source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
# Available at https://github.com/Mellanox/libpsample
require_command psample
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index beee0d5646a6..f7d84549cc3e 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -1,6 +1,6 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Copyright 2020 NXP Semiconductors
+# Copyright 2020 NXP
WAIT_TIME=1
NUM_NETIFS=4
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 477cbb042f5b..0315955ff0f4 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -62,6 +62,9 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
struct binder_version version = { 0 };
char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX",
device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
+ static const char * const binder_features[] = {
+ "oneway_spam_detection",
+ };
change_mountns(_metadata);
@@ -150,6 +153,20 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
}
/* success: binder-control device removal failed as expected */
+
+ for (int i = 0; i < ARRAY_SIZE(binder_features); i++) {
+ snprintf(device_path, sizeof(device_path), "%s/features/%s",
+ binderfs_mntpt, binder_features[i]);
+ fd = open(device_path, O_CLOEXEC | O_RDONLY);
+ EXPECT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open binder feature: %s",
+ strerror(errno), binder_features[i]);
+ goto umount;
+ }
+ close(fd);
+ }
+
+ /* success: binder feature files found */
result = 0;
umount:
diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c
index 0e393cb5f42d..4c6f0cd83c5b 100644
--- a/tools/testing/selftests/firmware/fw_namespace.c
+++ b/tools/testing/selftests/firmware/fw_namespace.c
@@ -129,7 +129,8 @@ int main(int argc, char **argv)
die("mounting tmpfs to /lib/firmware failed\n");
sys_path = argv[1];
- asprintf(&fw_path, "/lib/firmware/%s", fw_name);
+ if (asprintf(&fw_path, "/lib/firmware/%s", fw_name) < 0)
+ die("error: failed to build full fw_path\n");
setup_fw(fw_path);
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc
new file mode 100644
index 000000000000..60c02b482be8
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc
@@ -0,0 +1,90 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove eprobe events
+# requires: dynamic_events events/syscalls/sys_enter_openat "e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+clear_dynamic_events
+
+SYSTEM="syscalls"
+EVENT="sys_enter_openat"
+FIELD="filename"
+EPROBE="eprobe_open"
+OPTIONS="file=+0(\$filename):ustring"
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+
+echo 1 > events/eprobes/$EPROBE/enable
+ls
+echo 0 > events/eprobes/$EPROBE/enable
+
+content=`grep '^ *ls-' trace | grep 'file='`
+nocontent=`grep '^ *ls-' trace | grep 'file=' | grep -v -e '"/' -e '"."' -e '(fault)' ` || true
+
+if [ -z "$content" ]; then
+ exit_fail
+fi
+
+if [ ! -z "$nocontent" ]; then
+ exit_fail
+fi
+
+echo "-:$EPROBE" >> dynamic_events
+
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# test various ways to remove the probe (already tested with just event name)
+
+# With group name
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With group name and system/event
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE $SYSTEM/$EVENT" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With just event name and system/event
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:$EPROBE $SYSTEM/$EVENT" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With just event name and system/event and options
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With group name and system/event and options
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# Finally make sure what is in the dynamic_events file clears it too
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+LINE=`sed -e '/$EPROBE/s/^e/-/' < dynamic_events`
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
new file mode 100644
index 000000000000..db522577ff78
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
@@ -0,0 +1,38 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - check if duplicate events are caught
+# requires: dynamic_events "e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+HAVE_KPROBES=0
+
+if [ -f kprobe_events ]; then
+ HAVE_KPROBES=1
+fi
+
+clear_dynamic_events
+
+# first create dynamic events for eprobes and kprobes.
+
+echo 'e:egroup/eevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+# Test eprobe for same eprobe, existing kprobe and existing event
+! echo 'e:egroup/eevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+! echo 'e:syscalls/sys_enter_open syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+if [ $HAVE_KPROBES -eq 1 ]; then
+ echo 'p:kgroup/kevent vfs_open file=+0($arg2)' >> dynamic_events
+ ! echo 'e:kgroup/kevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+# Test kprobe for same kprobe, existing eprobe and existing event
+ ! echo 'p:kgroup/kevent vfs_open file=+0($arg2)' >> dynamic_events
+ ! echo 'p:egroup/eevent vfs_open file=+0($arg2)' >> dynamic_events
+ ! echo 'p:syscalls/sys_enter_open vfs_open file=+0($arg2)' >> dynamic_events
+
+ echo '-:kgroup/kevent' >> dynamic_events
+fi
+
+echo '-:egroup/eevent' >> dynamic_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
index e6eb78f0b954..9933ed24f901 100644
--- a/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
@@ -57,6 +57,10 @@ enable_events() {
echo 1 > tracing_on
}
+other_task() {
+ sleep .001 || usleep 1 || sleep 1
+}
+
echo 0 > options/event-fork
do_reset
@@ -94,6 +98,9 @@ child=$!
echo "child = $child"
wait $child
+# Be sure some other events will happen for small systems (e.g. 1 core)
+other_task
+
echo 0 > tracing_on
cnt=`count_pid $mypid`
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index a6fac927ee82..000fd05e84b1 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -83,6 +83,27 @@ clear_synthetic_events() { # reset all current synthetic events
done
}
+clear_dynamic_events() { # reset all current dynamic events
+ again=1
+ stop=1
+ # loop mulitple times as some events require other to be removed first
+ while [ $again -eq 1 ]; do
+ stop=$((stop+1))
+ # Prevent infinite loops
+ if [ $stop -gt 10 ]; then
+ break;
+ fi
+ again=2
+ grep -v '^#' dynamic_events|
+ while read line; do
+ del=`echo $line | sed -e 's/^.\([^ ]*\).*/-\1/'`
+ if ! echo "$del" >> dynamic_events; then
+ again=1
+ fi
+ done
+ done
+}
+
initialize_ftrace() { # Reset ftrace to initial-state
# As the initial state, ftrace will be set to nop tracer,
# no events, no triggers, no filters, no function filters,
@@ -93,6 +114,7 @@ initialize_ftrace() { # Reset ftrace to initial-state
reset_events_filter
reset_ftrace_filter
disable_events
+ clear_dynamic_events
[ -f set_event_pid ] && echo > set_event_pid
[ -f set_ftrace_pid ] && echo > set_ftrace_pid
[ -f set_ftrace_notrace ] && echo > set_ftrace_notrace
@@ -115,7 +137,7 @@ check_requires() { # Check required files and tracers
echo "Required tracer $t is not configured."
exit_unsupported
fi
- elif [ $r != $i ]; then
+ elif [ "$r" != "$i" ]; then
if ! grep -Fq "$r" README ; then
echo "Required feature pattern \"$r\" is not in README."
exit_unsupported
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc
new file mode 100644
index 000000000000..914fe2e5d030
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc
@@ -0,0 +1,53 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger eprobe on synthetic event
+# requires: dynamic_events synthetic_events events/syscalls/sys_enter_openat/hist "e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+clear_dynamic_events
+
+SYSTEM="syscalls"
+START="sys_enter_openat"
+END="sys_exit_openat"
+FIELD="filename"
+SYNTH="synth_open"
+EPROBE="eprobe_open"
+
+echo "$SYNTH u64 filename; s64 ret;" > synthetic_events
+echo "hist:keys=common_pid:__arg__1=$FIELD" > events/$SYSTEM/$START/trigger
+echo "hist:keys=common_pid:filename=\$__arg__1,ret=ret:onmatch($SYSTEM.$START).trace($SYNTH,\$filename,\$ret)" > events/$SYSTEM/$END/trigger
+
+echo "e:$EPROBE synthetic/$SYNTH file=+0(\$filename):ustring ret=\$ret:s64" >> dynamic_events
+
+grep -q "$SYNTH" dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/synthetic/$SYNTH
+test -d events/eprobes/$EPROBE
+
+echo 1 > events/eprobes/$EPROBE/enable
+ls
+echo 0 > events/eprobes/$EPROBE/enable
+
+content=`grep '^ *ls-' trace | grep 'file='`
+nocontent=`grep '^ *ls-' trace | grep 'file=' | grep -v -e '"/' -e '"."'` || true
+
+if [ -z "$content" ]; then
+ exit_fail
+fi
+
+if [ ! -z "$nocontent" ]; then
+ exit_fail
+fi
+
+echo "-:$EPROBE" >> dynamic_events
+echo '!'"hist:keys=common_pid:filename=\$__arg__1,ret=ret:onmatch($SYSTEM.$START).trace($SYNTH,\$filename,\$ret)" > events/$SYSTEM/$END/trigger
+echo '!'"hist:keys=common_pid:__arg__1=$FIELD" > events/$SYSTEM/$START/trigger
+echo '!'"$SYNTH u64 filename; s64 ret;" >> synthetic_events
+
+! grep -q "$SYNTH" dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/synthetic/$SYNTH
+! test -d events/eprobes/$EPROBE
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
index 2950bfbc6fce..adae72665500 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
@@ -39,6 +39,24 @@ grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \
reset_trigger
+echo "Test histogram with sym modifier"
+
+echo 'hist:keys=call_site.sym' > events/kmem/kmalloc/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep '{ call_site: \[[0-9a-f][0-9a-f]*\] [_a-zA-Z][_a-zA-Z]* *}' events/kmem/kmalloc/hist > /dev/null || \
+ fail "sym modifier on kmalloc call_site did not work"
+
+reset_trigger
+
+echo "Test histogram with sym-offset modifier"
+
+echo 'hist:keys=call_site.sym-offset' > events/kmem/kmalloc/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep '{ call_site: \[[0-9a-f][0-9a-f]*\] [_a-zA-Z][_a-zA-Z]*+0x[0-9a-f][0-9a-f]*' events/kmem/kmalloc/hist > /dev/null || \
+ fail "sym-offset modifier on kmalloc call_site did not work"
+
+reset_trigger
+
echo "Test histogram with sort key"
echo 'hist:keys=parent_pid,child_pid:sort=child_pid.ascending' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index 0efcd494daab..0e78b49d0f2f 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -6,3 +6,5 @@ futex_wait_private_mapped_file
futex_wait_timeout
futex_wait_uninitialized_heap
futex_wait_wouldblock
+futex_wait
+futex_requeue
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 23207829ec75..bd1fec59e010 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-INCLUDES := -I../include -I../../
+INCLUDES := -I../include -I../../ -I../../../../../usr/include/ \
+ -I$(KBUILD_OUTPUT)/kselftest/usr/include
CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
LDLIBS := -lpthread -lrt
@@ -14,7 +15,9 @@ TEST_GEN_FILES := \
futex_requeue_pi_signal_restart \
futex_requeue_pi_mismatched_ops \
futex_wait_uninitialized_heap \
- futex_wait_private_mapped_file
+ futex_wait_private_mapped_file \
+ futex_wait \
+ futex_requeue
TEST_PROGS := run.sh
diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c
new file mode 100644
index 000000000000..51485be6eb2f
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Collabora Ltd., 2021
+ *
+ * futex cmp requeue test by André Almeida <andrealmeid@collabora.com>
+ */
+
+#include <pthread.h>
+#include <limits.h>
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-requeue"
+#define timeout_ns 30000000
+#define WAKE_WAIT_US 10000
+
+volatile futex_t *f1;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *waiterfn(void *arg)
+{
+ struct timespec to;
+
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ if (futex_wait(f1, *f1, &to, 0))
+ printf("waiter failed errno %d\n", errno);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t waiter[10];
+ int res, ret = RET_PASS;
+ int c, i;
+ volatile futex_t _f1 = 0;
+ volatile futex_t f2 = 0;
+
+ f1 = &_f1;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(2);
+ ksft_print_msg("%s: Test futex_requeue\n",
+ basename(argv[0]));
+
+ /*
+ * Requeue a waiter from f1 to f2, and wake f2.
+ */
+ if (pthread_create(&waiter[0], NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Requeuing 1 futex from f1 to f2\n");
+ res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ }
+
+
+ info("Waking 1 futex at f2\n");
+ res = futex_wake(&f2, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_requeue simple succeeds\n");
+ }
+
+
+ /*
+ * Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7.
+ * At futex_wake, wake INT_MAX (should be exactly 7).
+ */
+ for (i = 0; i < 10; i++) {
+ if (pthread_create(&waiter[i], NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+ }
+
+ usleep(WAKE_WAIT_US);
+
+ info("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n");
+ res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0);
+ if (res != 10) {
+ ksft_test_result_fail("futex_requeue many returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ }
+
+ info("Waking INT_MAX futexes at f2\n");
+ res = futex_wake(&f2, INT_MAX, 0);
+ if (res != 7) {
+ ksft_test_result_fail("futex_requeue many returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_requeue many succeeds\n");
+ }
+
+ ksft_print_cnts();
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c
new file mode 100644
index 000000000000..685140d9b93d
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Collabora Ltd., 2021
+ *
+ * futex cmp requeue test by André Almeida <andrealmeid@collabora.com>
+ */
+
+#include <pthread.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-wait"
+#define timeout_ns 30000000
+#define WAKE_WAIT_US 10000
+#define SHM_PATH "futex_shm_file"
+
+void *futex;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+static void *waiterfn(void *arg)
+{
+ struct timespec to;
+ unsigned int flags = 0;
+
+ if (arg)
+ flags = *((unsigned int *) arg);
+
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ if (futex_wait(futex, 0, &to, flags))
+ printf("waiter failed errno %d\n", errno);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ int res, ret = RET_PASS, fd, c, shm_id;
+ u_int32_t f_private = 0, *shared_data;
+ unsigned int flags = FUTEX_PRIVATE_FLAG;
+ pthread_t waiter;
+ void *shm;
+
+ futex = &f_private;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(3);
+ ksft_print_msg("%s: Test futex_wait\n", basename(argv[0]));
+
+ /* Testing a private futex */
+ info("Calling private futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Calling private futex_wake on futex: %p\n", futex);
+ res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake private returned: %d %s\n",
+ errno, strerror(errno));
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wake private succeeds\n");
+ }
+
+ /* Testing an anon page shared memory */
+ shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
+ if (shm_id < 0) {
+ perror("shmget");
+ exit(1);
+ }
+
+ shared_data = shmat(shm_id, NULL, 0);
+
+ *shared_data = 0;
+ futex = shared_data;
+
+ info("Calling shared (page anon) futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Calling shared (page anon) futex_wake on futex: %p\n", futex);
+ res = futex_wake(futex, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n",
+ errno, strerror(errno));
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wake shared (page anon) succeeds\n");
+ }
+
+
+ /* Testing a file backed shared memory */
+ fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ perror("open");
+ exit(1);
+ }
+
+ if (ftruncate(fd, sizeof(f_private))) {
+ perror("ftruncate");
+ exit(1);
+ }
+
+ shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (shm == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ memcpy(shm, &f_private, sizeof(f_private));
+
+ futex = shm;
+
+ info("Calling shared (file backed) futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Calling shared (file backed) futex_wake on futex: %p\n", futex);
+ res = futex_wake(shm, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n",
+ errno, strerror(errno));
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wake shared (file backed) succeeds\n");
+ }
+
+ /* Freeing resources */
+ shmdt(shared_data);
+ munmap(shm, sizeof(f_private));
+ remove(SHM_PATH);
+ close(fd);
+
+ ksft_print_cnts();
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index ee55e6d389a3..1f8f6daaf1e7 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -11,21 +11,18 @@
*
* HISTORY
* 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ * 2021-Apr-26: More test cases by André Almeida <andrealmeid@collabora.com>
*
*****************************************************************************/
-#include <errno.h>
-#include <getopt.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
+#include <pthread.h>
#include "futextest.h"
#include "logging.h"
#define TEST_NAME "futex-wait-timeout"
static long timeout_ns = 100000; /* 100us default timeout */
+static futex_t futex_pi;
void usage(char *prog)
{
@@ -37,11 +34,67 @@ void usage(char *prog)
VQUIET, VCRITICAL, VINFO);
}
+/*
+ * Get a PI lock and hold it forever, so the main thread lock_pi will block
+ * and we can test the timeout
+ */
+void *get_pi_lock(void *arg)
+{
+ int ret;
+ volatile futex_t lock = 0;
+
+ ret = futex_lock_pi(&futex_pi, NULL, 0, 0);
+ if (ret != 0)
+ error("futex_lock_pi failed\n", ret);
+
+ /* Blocks forever */
+ ret = futex_wait(&lock, 0, NULL, 0);
+ error("futex_wait failed\n", ret);
+
+ return NULL;
+}
+
+/*
+ * Check if the function returned the expected error
+ */
+static void test_timeout(int res, int *ret, char *test_name, int err)
+{
+ if (!res || errno != err) {
+ ksft_test_result_fail("%s returned %d\n", test_name,
+ res < 0 ? errno : res);
+ *ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("%s succeeds\n", test_name);
+ }
+}
+
+/*
+ * Calculate absolute timeout and correct overflow
+ */
+static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to,
+ long timeout_ns)
+{
+ if (clock_gettime(clockid, to)) {
+ error("clock_gettime failed\n", errno);
+ return errno;
+ }
+
+ to->tv_nsec += timeout_ns;
+
+ if (to->tv_nsec >= 1000000000) {
+ to->tv_sec++;
+ to->tv_nsec -= 1000000000;
+ }
+
+ return 0;
+}
+
int main(int argc, char *argv[])
{
futex_t f1 = FUTEX_INITIALIZER;
- struct timespec to;
int res, ret = RET_PASS;
+ struct timespec to;
+ pthread_t thread;
int c;
while ((c = getopt(argc, argv, "cht:v:")) != -1) {
@@ -65,22 +118,63 @@ int main(int argc, char *argv[])
}
ksft_print_header();
- ksft_set_plan(1);
+ ksft_set_plan(7);
ksft_print_msg("%s: Block on a futex and wait for timeout\n",
basename(argv[0]));
ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
- /* initialize timeout */
+ pthread_create(&thread, NULL, get_pi_lock, NULL);
+
+ /* initialize relative timeout */
to.tv_sec = 0;
to.tv_nsec = timeout_ns;
- info("Calling futex_wait on f1: %u @ %p\n", f1, &f1);
- res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG);
- if (!res || errno != ETIMEDOUT) {
- fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
- ret = RET_FAIL;
- }
+ res = futex_wait(&f1, f1, &to, 0);
+ test_timeout(res, &ret, "futex_wait relative", ETIMEDOUT);
+
+ /* FUTEX_WAIT_BITSET with CLOCK_REALTIME */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_wait_bitset realtime", ETIMEDOUT);
+
+ /* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */
+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_bitset(&f1, f1, &to, 1, 0);
+ test_timeout(res, &ret, "futex_wait_bitset monotonic", ETIMEDOUT);
+
+ /* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_wait_requeue_pi realtime", ETIMEDOUT);
+
+ /* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */
+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0);
+ test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+
+ /*
+ * FUTEX_LOCK_PI with CLOCK_REALTIME
+ * Due to historical reasons, FUTEX_LOCK_PI supports only realtime
+ * clock, but requires the caller to not set CLOCK_REALTIME flag.
+ *
+ * If you call FUTEX_LOCK_PI with a monotonic clock, it'll be
+ * interpreted as a realtime clock, and (unless you mess your machine's
+ * time or your time machine) the monotonic clock value is always
+ * smaller than realtime and the syscall will timeout immediately.
+ */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_lock_pi(&futex_pi, &to, 0, 0);
+ test_timeout(res, &ret, "futex_lock_pi realtime", ETIMEDOUT);
+
+ /* Test operations that don't support FUTEX_CLOCK_REALTIME */
+ res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS);
- print_result(TEST_NAME, ret);
+ ksft_print_cnts();
return ret;
}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 1acb6ace1680..11a9d62290f5 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -73,3 +73,9 @@ echo
echo
./futex_wait_uninitialized_heap $COLOR
./futex_wait_private_mapped_file $COLOR
+
+echo
+./futex_wait $COLOR
+
+echo
+./futex_requeue $COLOR
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 524c857a049c..b8dbabe24ac2 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
+/aarch64/debug-exceptions
/aarch64/get-reg-list
-/aarch64/get-reg-list-sve
+/aarch64/psci_cpu_on_test
/aarch64/vgic_init
/s390x/memop
/s390x/resets
@@ -8,18 +9,22 @@
/x86_64/cr4_cpuid_sync_test
/x86_64/debug_regs
/x86_64/evmcs_test
+/x86_64/emulator_error_test
/x86_64/get_cpuid_test
/x86_64/get_msr_index_features
/x86_64/kvm_pv_test
/x86_64/hyperv_clock
/x86_64/hyperv_cpuid
+/x86_64/hyperv_features
/x86_64/mmio_warning_test
+/x86_64/mmu_role_test
/x86_64/platform_info_test
/x86_64/set_boot_cpu_id
/x86_64/set_sregs_test
/x86_64/smm_test
/x86_64/state_test
/x86_64/svm_vmcall_test
+/x86_64/svm_int_ctl_test
/x86_64/sync_regs_test
/x86_64/tsc_msrs_test
/x86_64/userspace_msr_exit_test
@@ -29,11 +34,13 @@
/x86_64/vmx_preemption_timer_test
/x86_64/vmx_set_nested_state_test
/x86_64/vmx_tsc_adjust_test
+/x86_64/vmx_nested_tsc_scaling_test
/x86_64/xapic_ipi_test
/x86_64/xen_shinfo_test
/x86_64/xen_vmcall_test
/x86_64/xss_msr_test
/x86_64/vmx_pmu_msrs_test
+/access_tracking_perf_test
/demand_paging_test
/dirty_log_test
/dirty_log_perf_test
@@ -42,5 +49,7 @@
/kvm_page_table_test
/memslot_modification_stress_test
/memslot_perf_test
+/rseq_test
/set_memory_region_test
/steal_time
+/kvm_binary_stats_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index daaee1888b12..d1774f461393 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -34,18 +34,21 @@ ifeq ($(ARCH),s390)
endif
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
-LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
+LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
+LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
+TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test
TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
+TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
@@ -53,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
@@ -60,6 +64,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
@@ -67,6 +72,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
+TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
@@ -75,19 +81,24 @@ TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += kvm_page_table_test
TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
TEST_GEN_PROGS_x86_64 += memslot_perf_test
+TEST_GEN_PROGS_x86_64 += rseq_test
TEST_GEN_PROGS_x86_64 += set_memory_region_test
TEST_GEN_PROGS_x86_64 += steal_time
+TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
+TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
-TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
+TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
TEST_GEN_PROGS_aarch64 += kvm_page_table_test
+TEST_GEN_PROGS_aarch64 += rseq_test
TEST_GEN_PROGS_aarch64 += set_memory_region_test
TEST_GEN_PROGS_aarch64 += steal_time
+TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
TEST_GEN_PROGS_s390x = s390x/memop
TEST_GEN_PROGS_s390x += s390x/resets
@@ -96,7 +107,9 @@ TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
TEST_GEN_PROGS_s390x += kvm_page_table_test
+TEST_GEN_PROGS_s390x += rseq_test
TEST_GEN_PROGS_s390x += set_memory_region_test
+TEST_GEN_PROGS_s390x += kvm_binary_stats_test
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
new file mode 100644
index 000000000000..e5e6c92b60da
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+#define MDSCR_KDE (1 << 13)
+#define MDSCR_MDE (1 << 15)
+#define MDSCR_SS (1 << 0)
+
+#define DBGBCR_LEN8 (0xff << 5)
+#define DBGBCR_EXEC (0x0 << 3)
+#define DBGBCR_EL1 (0x1 << 1)
+#define DBGBCR_E (0x1 << 0)
+
+#define DBGWCR_LEN8 (0xff << 5)
+#define DBGWCR_RD (0x1 << 3)
+#define DBGWCR_WR (0x2 << 3)
+#define DBGWCR_EL1 (0x1 << 1)
+#define DBGWCR_E (0x1 << 0)
+
+#define SPSR_D (1 << 9)
+#define SPSR_SS (1 << 21)
+
+extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start;
+static volatile uint64_t sw_bp_addr, hw_bp_addr;
+static volatile uint64_t wp_addr, wp_data_addr;
+static volatile uint64_t svc_addr;
+static volatile uint64_t ss_addr[4], ss_idx;
+#define PC(v) ((uint64_t)&(v))
+
+static void reset_debug_state(void)
+{
+ asm volatile("msr daifset, #8");
+
+ write_sysreg(osdlr_el1, 0);
+ write_sysreg(oslar_el1, 0);
+ isb();
+
+ write_sysreg(mdscr_el1, 0);
+ /* This test only uses the first bp and wp slot. */
+ write_sysreg(dbgbvr0_el1, 0);
+ write_sysreg(dbgbcr0_el1, 0);
+ write_sysreg(dbgwcr0_el1, 0);
+ write_sysreg(dbgwvr0_el1, 0);
+ isb();
+}
+
+static void install_wp(uint64_t addr)
+{
+ uint32_t wcr;
+ uint32_t mdscr;
+
+ wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
+ write_sysreg(dbgwcr0_el1, wcr);
+ write_sysreg(dbgwvr0_el1, addr);
+ isb();
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+ write_sysreg(mdscr_el1, mdscr);
+ isb();
+}
+
+static void install_hw_bp(uint64_t addr)
+{
+ uint32_t bcr;
+ uint32_t mdscr;
+
+ bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
+ write_sysreg(dbgbcr0_el1, bcr);
+ write_sysreg(dbgbvr0_el1, addr);
+ isb();
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+ write_sysreg(mdscr_el1, mdscr);
+ isb();
+}
+
+static void install_ss(void)
+{
+ uint32_t mdscr;
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
+ write_sysreg(mdscr_el1, mdscr);
+ isb();
+}
+
+static volatile char write_data;
+
+static void guest_code(void)
+{
+ GUEST_SYNC(0);
+
+ /* Software-breakpoint */
+ asm volatile("sw_bp: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
+
+ GUEST_SYNC(1);
+
+ /* Hardware-breakpoint */
+ reset_debug_state();
+ install_hw_bp(PC(hw_bp));
+ asm volatile("hw_bp: nop");
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
+
+ GUEST_SYNC(2);
+
+ /* Hardware-breakpoint + svc */
+ reset_debug_state();
+ install_hw_bp(PC(bp_svc));
+ asm volatile("bp_svc: svc #0");
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
+ GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
+
+ GUEST_SYNC(3);
+
+ /* Hardware-breakpoint + software-breakpoint */
+ reset_debug_state();
+ install_hw_bp(PC(bp_brk));
+ asm volatile("bp_brk: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
+
+ GUEST_SYNC(4);
+
+ /* Watchpoint */
+ reset_debug_state();
+ install_wp(PC(write_data));
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+ GUEST_SYNC(5);
+
+ /* Single-step */
+ reset_debug_state();
+ install_ss();
+ ss_idx = 0;
+ asm volatile("ss_start:\n"
+ "mrs x0, esr_el1\n"
+ "add x0, x0, #1\n"
+ "msr daifset, #8\n"
+ : : : "x0");
+ GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
+ GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
+ GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
+
+ GUEST_DONE();
+}
+
+static void guest_sw_bp_handler(struct ex_regs *regs)
+{
+ sw_bp_addr = regs->pc;
+ regs->pc += 4;
+}
+
+static void guest_hw_bp_handler(struct ex_regs *regs)
+{
+ hw_bp_addr = regs->pc;
+ regs->pstate |= SPSR_D;
+}
+
+static void guest_wp_handler(struct ex_regs *regs)
+{
+ wp_data_addr = read_sysreg(far_el1);
+ wp_addr = regs->pc;
+ regs->pstate |= SPSR_D;
+}
+
+static void guest_ss_handler(struct ex_regs *regs)
+{
+ GUEST_ASSERT_1(ss_idx < 4, ss_idx);
+ ss_addr[ss_idx++] = regs->pc;
+ regs->pstate |= SPSR_SS;
+}
+
+static void guest_svc_handler(struct ex_regs *regs)
+{
+ svc_addr = regs->pc;
+}
+
+static int debug_version(struct kvm_vm *vm)
+{
+ uint64_t id_aa64dfr0;
+
+ get_reg(vm, VCPU_ID, ARM64_SYS_REG(ID_AA64DFR0_EL1), &id_aa64dfr0);
+ return id_aa64dfr0 & 0xf;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage;
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ ucall_init(vm, NULL);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ if (debug_version(vm) < 6) {
+ print_skip("Armv8 debug architecture not supported.");
+ kvm_vm_free(vm);
+ exit(KSFT_SKIP);
+ }
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_BRK_INS, guest_sw_bp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_WP_CURRENT, guest_wp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_SSTEP_CURRENT, guest_ss_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_SVC64, guest_svc_handler);
+
+ for (stage = 0; stage < 7; stage++) {
+ vcpu_run(vm, VCPU_ID);
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Stage %d: Unexpected sync ucall, got %lx",
+ stage, (ulong)uc.args[1]);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx",
+ (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2], uc.args[3]);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
deleted file mode 100644
index efba76682b4b..000000000000
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
+++ /dev/null
@@ -1,3 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define REG_LIST_SVE
-#include "get-reg-list.c"
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 486932164cf2..cc898181faab 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -27,17 +27,37 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
#include "kvm_util.h"
#include "test_util.h"
#include "processor.h"
-#ifdef REG_LIST_SVE
-#define reg_list_sve() (true)
-#else
-#define reg_list_sve() (false)
-#endif
+static struct kvm_reg_list *reg_list;
+static __u64 *blessed_reg, blessed_n;
-#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+struct reg_sublist {
+ const char *name;
+ long capability;
+ int feature;
+ bool finalize;
+ __u64 *regs;
+ __u64 regs_n;
+ __u64 *rejects_set;
+ __u64 rejects_set_n;
+};
+
+struct vcpu_config {
+ char *name;
+ struct reg_sublist sublists[];
+};
+
+static struct vcpu_config *vcpu_configs[];
+static int vcpu_configs_n;
+
+#define for_each_sublist(c, s) \
+ for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
#define for_each_reg(i) \
for ((i) = 0; (i) < reg_list->n; ++(i))
@@ -54,12 +74,41 @@
for_each_reg_filtered(i) \
if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+static const char *config_name(struct vcpu_config *c)
+{
+ struct reg_sublist *s;
+ int len = 0;
-static struct kvm_reg_list *reg_list;
+ if (c->name)
+ return c->name;
-static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
-static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
-static __u64 *blessed_reg, blessed_n;
+ for_each_sublist(c, s)
+ len += strlen(s->name) + 1;
+
+ c->name = malloc(len);
+
+ len = 0;
+ for_each_sublist(c, s) {
+ if (!strcmp(s->name, "base"))
+ continue;
+ strcat(c->name + len, s->name);
+ len += strlen(s->name) + 1;
+ c->name[len - 1] = '+';
+ }
+ c->name[len - 1] = '\0';
+
+ return c->name;
+}
+
+static bool has_cap(struct vcpu_config *c, long capability)
+{
+ struct reg_sublist *s;
+
+ for_each_sublist(c, s)
+ if (s->capability == capability)
+ return true;
+ return false;
+}
static bool filter_reg(__u64 reg)
{
@@ -96,11 +145,13 @@ static const char *str_with_index(const char *template, __u64 index)
return (const char *)str;
}
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
#define CORE_REGS_XX_NR_WORDS 2
#define CORE_SPSR_XX_NR_WORDS 2
#define CORE_FPREGS_XX_NR_WORDS 4
-static const char *core_id_to_str(__u64 id)
+static const char *core_id_to_str(struct vcpu_config *c, __u64 id)
{
__u64 core_off = id & ~REG_MASK, idx;
@@ -111,7 +162,7 @@ static const char *core_id_to_str(__u64 id)
case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
KVM_REG_ARM_CORE_REG(regs.regs[30]):
idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
- TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx);
+ TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx);
return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
case KVM_REG_ARM_CORE_REG(regs.sp):
return "KVM_REG_ARM_CORE_REG(regs.sp)";
@@ -126,12 +177,12 @@ static const char *core_id_to_str(__u64 id)
case KVM_REG_ARM_CORE_REG(spsr[0]) ...
KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
- TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx);
+ TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx);
return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
- TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx);
+ TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx);
return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
@@ -139,11 +190,11 @@ static const char *core_id_to_str(__u64 id)
return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
}
- TEST_FAIL("Unknown core reg id: 0x%llx", id);
+ TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id);
return NULL;
}
-static const char *sve_id_to_str(__u64 id)
+static const char *sve_id_to_str(struct vcpu_config *c, __u64 id)
{
__u64 sve_off, n, i;
@@ -153,37 +204,37 @@ static const char *sve_id_to_str(__u64 id)
sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
- TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id);
+ TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id);
switch (sve_off) {
case KVM_REG_ARM64_SVE_ZREG_BASE ...
KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
- "Unexpected bits set in SVE ZREG id: 0x%llx", id);
+ "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id);
return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
case KVM_REG_ARM64_SVE_PREG_BASE ...
KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
- "Unexpected bits set in SVE PREG id: 0x%llx", id);
+ "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id);
return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
case KVM_REG_ARM64_SVE_FFR_BASE:
TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
- "Unexpected bits set in SVE FFR id: 0x%llx", id);
+ "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id);
return "KVM_REG_ARM64_SVE_FFR(0)";
}
return NULL;
}
-static void print_reg(__u64 id)
+static void print_reg(struct vcpu_config *c, __u64 id)
{
unsigned op0, op1, crn, crm, op2;
const char *reg_size = NULL;
TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
- "KVM_REG_ARM64 missing in reg id: 0x%llx", id);
+ "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id);
switch (id & KVM_REG_SIZE_MASK) {
case KVM_REG_SIZE_U8:
@@ -214,17 +265,17 @@ static void print_reg(__u64 id)
reg_size = "KVM_REG_SIZE_U2048";
break;
default:
- TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx",
- (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+ TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+ config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
}
switch (id & KVM_REG_ARM_COPROC_MASK) {
case KVM_REG_ARM_CORE:
- printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id));
+ printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id));
break;
case KVM_REG_ARM_DEMUX:
TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
- "Unexpected bits set in DEMUX reg id: 0x%llx", id);
+ "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id);
printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
break;
@@ -235,23 +286,23 @@ static void print_reg(__u64 id)
crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
- "Unexpected bits set in SYSREG reg id: 0x%llx", id);
+ "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id);
printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
break;
case KVM_REG_ARM_FW:
TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
- "Unexpected bits set in FW reg id: 0x%llx", id);
+ "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id);
printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
break;
case KVM_REG_ARM64_SVE:
- if (reg_list_sve())
- printf("\t%s,\n", sve_id_to_str(id));
+ if (has_cap(c, KVM_CAP_ARM_SVE))
+ printf("\t%s,\n", sve_id_to_str(c, id));
else
- TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
+ TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id);
break;
default:
- TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx",
- (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+ TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+ config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
}
}
@@ -312,56 +363,58 @@ static void core_reg_fixup(void)
reg_list = tmp;
}
-static void prepare_vcpu_init(struct kvm_vcpu_init *init)
+static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init)
{
- if (reg_list_sve())
- init->features[0] |= 1 << KVM_ARM_VCPU_SVE;
+ struct reg_sublist *s;
+
+ for_each_sublist(c, s)
+ if (s->capability)
+ init->features[s->feature / 32] |= 1 << (s->feature % 32);
}
-static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid, struct vcpu_config *c)
{
+ struct reg_sublist *s;
int feature;
- if (reg_list_sve()) {
- feature = KVM_ARM_VCPU_SVE;
- vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+ for_each_sublist(c, s) {
+ if (s->finalize) {
+ feature = s->feature;
+ vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+ }
}
}
-static void check_supported(void)
+static void check_supported(struct vcpu_config *c)
{
- if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) {
- fprintf(stderr, "SVE not available, skipping tests\n");
- exit(KSFT_SKIP);
+ struct reg_sublist *s;
+
+ for_each_sublist(c, s) {
+ if (s->capability && !kvm_check_cap(s->capability)) {
+ fprintf(stderr, "%s: %s not available, skipping tests\n", config_name(c), s->name);
+ exit(KSFT_SKIP);
+ }
}
}
-int main(int ac, char **av)
+static bool print_list;
+static bool print_filtered;
+static bool fixup_core_regs;
+
+static void run_test(struct vcpu_config *c)
{
struct kvm_vcpu_init init = { .target = -1, };
- int new_regs = 0, missing_regs = 0, i;
+ int new_regs = 0, missing_regs = 0, i, n;
int failed_get = 0, failed_set = 0, failed_reject = 0;
- bool print_list = false, print_filtered = false, fixup_core_regs = false;
struct kvm_vm *vm;
- __u64 *vec_regs;
+ struct reg_sublist *s;
- check_supported();
-
- for (i = 1; i < ac; ++i) {
- if (strcmp(av[i], "--core-reg-fixup") == 0)
- fixup_core_regs = true;
- else if (strcmp(av[i], "--list") == 0)
- print_list = true;
- else if (strcmp(av[i], "--list-filtered") == 0)
- print_filtered = true;
- else
- TEST_FAIL("Unknown option: %s\n", av[i]);
- }
+ check_supported(c);
vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- prepare_vcpu_init(&init);
+ prepare_vcpu_init(c, &init);
aarch64_vcpu_add_default(vm, 0, &init, NULL);
- finalize_vcpu(vm, 0);
+ finalize_vcpu(vm, 0, c);
reg_list = vcpu_get_reg_list(vm, 0);
@@ -374,10 +427,10 @@ int main(int ac, char **av)
__u64 id = reg_list->reg[i];
if ((print_list && !filter_reg(id)) ||
(print_filtered && filter_reg(id)))
- print_reg(id);
+ print_reg(c, id);
}
putchar('\n');
- return 0;
+ return;
}
/*
@@ -396,50 +449,52 @@ int main(int ac, char **av)
.id = reg_list->reg[i],
.addr = (__u64)&addr,
};
+ bool reject_reg = false;
int ret;
ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
if (ret) {
- puts("Failed to get ");
- print_reg(reg.id);
+ printf("%s: Failed to get ", config_name(c));
+ print_reg(c, reg.id);
putchar('\n');
++failed_get;
}
/* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
- if (find_reg(rejects_set, rejects_set_n, reg.id)) {
- ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
- if (ret != -1 || errno != EPERM) {
- printf("Failed to reject (ret=%d, errno=%d) ", ret, errno);
- print_reg(reg.id);
- putchar('\n');
- ++failed_reject;
+ for_each_sublist(c, s) {
+ if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
+ reject_reg = true;
+ ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+ if (ret != -1 || errno != EPERM) {
+ printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
+ print_reg(c, reg.id);
+ putchar('\n');
+ ++failed_reject;
+ }
+ break;
}
- continue;
}
- ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
- if (ret) {
- puts("Failed to set ");
- print_reg(reg.id);
- putchar('\n');
- ++failed_set;
+ if (!reject_reg) {
+ ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+ if (ret) {
+ printf("%s: Failed to set ", config_name(c));
+ print_reg(c, reg.id);
+ putchar('\n');
+ ++failed_set;
+ }
}
}
- if (reg_list_sve()) {
- blessed_n = base_regs_n + sve_regs_n;
- vec_regs = sve_regs;
- } else {
- blessed_n = base_regs_n + vregs_n;
- vec_regs = vregs;
- }
-
+ for_each_sublist(c, s)
+ blessed_n += s->regs_n;
blessed_reg = calloc(blessed_n, sizeof(__u64));
- for (i = 0; i < base_regs_n; ++i)
- blessed_reg[i] = base_regs[i];
- for (i = 0; i < blessed_n - base_regs_n; ++i)
- blessed_reg[base_regs_n + i] = vec_regs[i];
+
+ n = 0;
+ for_each_sublist(c, s) {
+ for (i = 0; i < s->regs_n; ++i)
+ blessed_reg[n++] = s->regs[i];
+ }
for_each_new_reg(i)
++new_regs;
@@ -448,40 +503,141 @@ int main(int ac, char **av)
++missing_regs;
if (new_regs || missing_regs) {
- printf("Number blessed registers: %5lld\n", blessed_n);
- printf("Number registers: %5lld\n", reg_list->n);
+ printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
+ printf("%s: Number registers: %5lld\n", config_name(c), reg_list->n);
}
if (new_regs) {
- printf("\nThere are %d new registers.\n"
+ printf("\n%s: There are %d new registers.\n"
"Consider adding them to the blessed reg "
- "list with the following lines:\n\n", new_regs);
+ "list with the following lines:\n\n", config_name(c), new_regs);
for_each_new_reg(i)
- print_reg(reg_list->reg[i]);
+ print_reg(c, reg_list->reg[i]);
putchar('\n');
}
if (missing_regs) {
- printf("\nThere are %d missing registers.\n"
- "The following lines are missing registers:\n\n", missing_regs);
+ printf("\n%s: There are %d missing registers.\n"
+ "The following lines are missing registers:\n\n", config_name(c), missing_regs);
for_each_missing_reg(i)
- print_reg(blessed_reg[i]);
+ print_reg(c, blessed_reg[i]);
putchar('\n');
}
TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
- "There are %d missing registers; "
+ "%s: There are %d missing registers; "
"%d registers failed get; %d registers failed set; %d registers failed reject",
- missing_regs, failed_get, failed_set, failed_reject);
+ config_name(c), missing_regs, failed_get, failed_set, failed_reject);
- return 0;
+ pr_info("%s: PASS\n", config_name(c));
+ blessed_n = 0;
+ free(blessed_reg);
+ free(reg_list);
+ kvm_vm_free(vm);
+}
+
+static void help(void)
+{
+ struct vcpu_config *c;
+ int i;
+
+ printf(
+ "\n"
+ "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered] [--core-reg-fixup]\n\n"
+ " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n"
+ " '<selection>' may be\n");
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ printf(
+ " '%s'\n", config_name(c));
+ }
+
+ printf(
+ "\n"
+ " --list Print the register list rather than test it (requires --config)\n"
+ " --list-filtered Print registers that would normally be filtered out (requires --config)\n"
+ " --core-reg-fixup Needed when running on old kernels with broken core reg listings\n"
+ "\n"
+ );
+}
+
+static struct vcpu_config *parse_config(const char *config)
+{
+ struct vcpu_config *c;
+ int i;
+
+ if (config[8] != '=')
+ help(), exit(1);
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ if (strcmp(config_name(c), &config[9]) == 0)
+ break;
+ }
+
+ if (i == vcpu_configs_n)
+ help(), exit(1);
+
+ return c;
+}
+
+int main(int ac, char **av)
+{
+ struct vcpu_config *c, *sel = NULL;
+ int i, ret = 0;
+ pid_t pid;
+
+ for (i = 1; i < ac; ++i) {
+ if (strcmp(av[i], "--core-reg-fixup") == 0)
+ fixup_core_regs = true;
+ else if (strncmp(av[i], "--config", 8) == 0)
+ sel = parse_config(av[i]);
+ else if (strcmp(av[i], "--list") == 0)
+ print_list = true;
+ else if (strcmp(av[i], "--list-filtered") == 0)
+ print_filtered = true;
+ else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
+ help(), exit(0);
+ else
+ help(), exit(1);
+ }
+
+ if (print_list || print_filtered) {
+ /*
+ * We only want to print the register list of a single config.
+ */
+ if (!sel)
+ help(), exit(1);
+ }
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ if (sel && c != sel)
+ continue;
+
+ pid = fork();
+
+ if (!pid) {
+ run_test(c);
+ exit(0);
+ } else {
+ int wstatus;
+ pid_t wpid = wait(&wstatus);
+ TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
+ if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
+ ret = KSFT_FAIL;
+ }
+ }
+
+ return ret;
}
/*
* The current blessed list was primed with the output of kernel version
* v4.15 with --core-reg-fixup and then later updated with new registers.
*
- * The blessed list is up to date with kernel version v5.10-rc5
+ * The blessed list is up to date with kernel version v5.13-rc3
*/
static __u64 base_regs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
@@ -673,8 +829,6 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */
ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */
ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */
- ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
- ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
@@ -683,6 +837,16 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */
ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */
ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
+ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
+ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
+ ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
+};
+
+static __u64 pmu_regs[] = {
+ ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
+ ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */
ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */
ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */
@@ -692,8 +856,6 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */
ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */
ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */
- ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
- ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
ARM64_SYS_REG(3, 3, 14, 8, 0),
ARM64_SYS_REG(3, 3, 14, 8, 1),
ARM64_SYS_REG(3, 3, 14, 8, 2),
@@ -757,11 +919,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 3, 14, 15, 5),
ARM64_SYS_REG(3, 3, 14, 15, 6),
ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
- ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
- ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
- ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
};
-static __u64 base_regs_n = ARRAY_SIZE(base_regs);
static __u64 vregs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
@@ -797,7 +955,6 @@ static __u64 vregs[] = {
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
};
-static __u64 vregs_n = ARRAY_SIZE(vregs);
static __u64 sve_regs[] = {
KVM_REG_ARM64_SVE_VLS,
@@ -852,11 +1009,58 @@ static __u64 sve_regs[] = {
KVM_REG_ARM64_SVE_FFR(0),
ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */
};
-static __u64 sve_regs_n = ARRAY_SIZE(sve_regs);
-static __u64 rejects_set[] = {
-#ifdef REG_LIST_SVE
+static __u64 sve_rejects_set[] = {
KVM_REG_ARM64_SVE_VLS,
-#endif
};
-static __u64 rejects_set_n = ARRAY_SIZE(rejects_set);
+
+#define BASE_SUBLIST \
+ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
+#define VREGS_SUBLIST \
+ { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
+#define PMU_SUBLIST \
+ { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
+ .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+#define SVE_SUBLIST \
+ { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
+ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
+ .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+
+static struct vcpu_config vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_config vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_config sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_config sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_config *vcpu_configs[] = {
+ &vregs_config,
+ &vregs_pmu_config,
+ &sve_config,
+ &sve_pmu_config,
+};
+static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
new file mode 100644
index 000000000000..018c269990e1
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * psci_cpu_on_test - Test that the observable state of a vCPU targeted by the
+ * CPU_ON PSCI call matches what the caller requested.
+ *
+ * Copyright (c) 2021 Google LLC.
+ *
+ * This is a regression test for a race between KVM servicing the PSCI call and
+ * userspace reading the vCPUs registers.
+ */
+
+#define _GNU_SOURCE
+
+#include <linux/psci.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define VCPU_ID_SOURCE 0
+#define VCPU_ID_TARGET 1
+
+#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
+#define CPU_ON_CONTEXT_ID 0xdeadc0deul
+
+static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
+ uint64_t context_id)
+{
+ register uint64_t x0 asm("x0") = PSCI_0_2_FN64_CPU_ON;
+ register uint64_t x1 asm("x1") = target_cpu;
+ register uint64_t x2 asm("x2") = entry_addr;
+ register uint64_t x3 asm("x3") = context_id;
+
+ asm("hvc #0"
+ : "=r"(x0)
+ : "r"(x0), "r"(x1), "r"(x2), "r"(x3)
+ : "memory");
+
+ return x0;
+}
+
+static uint64_t psci_affinity_info(uint64_t target_affinity,
+ uint64_t lowest_affinity_level)
+{
+ register uint64_t x0 asm("x0") = PSCI_0_2_FN64_AFFINITY_INFO;
+ register uint64_t x1 asm("x1") = target_affinity;
+ register uint64_t x2 asm("x2") = lowest_affinity_level;
+
+ asm("hvc #0"
+ : "=r"(x0)
+ : "r"(x0), "r"(x1), "r"(x2)
+ : "memory");
+
+ return x0;
+}
+
+static void guest_main(uint64_t target_cpu)
+{
+ GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
+ uint64_t target_state;
+
+ do {
+ target_state = psci_affinity_info(target_cpu, 0);
+
+ GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
+ (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
+ } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
+
+ GUEST_DONE();
+}
+
+int main(void)
+{
+ uint64_t target_mpidr, obs_pc, obs_x0;
+ struct kvm_vcpu_init init;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ kvm_vm_elf_load(vm, program_invocation_name);
+ ucall_init(vm, NULL);
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+ aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_main);
+
+ /*
+ * make sure the target is already off when executing the test.
+ */
+ init.features[0] |= (1 << KVM_ARM_VCPU_POWER_OFF);
+ aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main);
+
+ get_reg(vm, VCPU_ID_TARGET, ARM64_SYS_REG(MPIDR_EL1), &target_mpidr);
+ vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK);
+ vcpu_run(vm, VCPU_ID_SOURCE);
+
+ switch (get_ucall(vm, VCPU_ID_SOURCE, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__,
+ uc.args[1]);
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
+ }
+
+ get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.pc), &obs_pc);
+ get_reg(vm, VCPU_ID_TARGET, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
+
+ TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
+ "unexpected target cpu pc: %lx (expected: %lx)",
+ obs_pc, CPU_ON_ENTRY_ADDR);
+ TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
+ "unexpected target context id: %lx (expected: %lx)",
+ obs_x0, CPU_ON_CONTEXT_ID);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
new file mode 100644
index 000000000000..5d95113c7b7c
--- /dev/null
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * access_tracking_perf_test
+ *
+ * Copyright (C) 2021, Google, Inc.
+ *
+ * This test measures the performance effects of KVM's access tracking.
+ * Access tracking is driven by the MMU notifiers test_young, clear_young, and
+ * clear_flush_young. These notifiers do not have a direct userspace API,
+ * however the clear_young notifier can be triggered by marking a pages as idle
+ * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to
+ * enable access tracking on guest memory.
+ *
+ * To measure performance this test runs a VM with a configurable number of
+ * vCPUs that each touch every page in disjoint regions of memory. Performance
+ * is measured in the time it takes all vCPUs to finish touching their
+ * predefined region.
+ *
+ * Note that a deterministic correctness test of access tracking is not possible
+ * by using page_idle as it exists today. This is for a few reasons:
+ *
+ * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This
+ * means subsequent guest accesses are not guaranteed to see page table
+ * updates made by KVM until some time in the future.
+ *
+ * 2. page_idle only operates on LRU pages. Newly allocated pages are not
+ * immediately allocated to LRU lists. Instead they are held in a "pagevec",
+ * which is drained to LRU lists some time in the future. There is no
+ * userspace API to force this drain to occur.
+ *
+ * These limitations are worked around in this test by using a large enough
+ * region of memory for each vCPU such that the number of translations cached in
+ * the TLB and the number of pages held in pagevecs are a small fraction of the
+ * overall workload. And if either of those conditions are not true this test
+ * will fail rather than silently passing.
+ */
+#include <inttypes.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "perf_test_util.h"
+#include "guest_modes.h"
+
+/* Global variable used to synchronize all of the vCPU threads. */
+static int iteration = -1;
+
+/* Defines what vCPU threads should do during a given iteration. */
+static enum {
+ /* Run the vCPU to access all its memory. */
+ ITERATION_ACCESS_MEMORY,
+ /* Mark the vCPU's memory idle in page_idle. */
+ ITERATION_MARK_IDLE,
+} iteration_work;
+
+/* Set to true when vCPU threads should exit. */
+static bool done;
+
+/* The iteration that was last completed by each vCPU. */
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+/* Whether to overlap the regions of memory vCPUs access. */
+static bool overlap_memory_access;
+
+struct test_params {
+ /* The backing source for the region of memory. */
+ enum vm_mem_backing_src_type backing_src;
+
+ /* The amount of memory to allocate for each vCPU. */
+ uint64_t vcpu_memory_bytes;
+
+ /* The number of vCPUs to create in the VM. */
+ int vcpus;
+};
+
+static uint64_t pread_uint64(int fd, const char *filename, uint64_t index)
+{
+ uint64_t value;
+ off_t offset = index * sizeof(value);
+
+ TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value),
+ "pread from %s offset 0x%" PRIx64 " failed!",
+ filename, offset);
+
+ return value;
+
+}
+
+#define PAGEMAP_PRESENT (1ULL << 63)
+#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1)
+
+static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva)
+{
+ uint64_t hva = (uint64_t) addr_gva2hva(vm, gva);
+ uint64_t entry;
+ uint64_t pfn;
+
+ entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize());
+ if (!(entry & PAGEMAP_PRESENT))
+ return 0;
+
+ pfn = entry & PAGEMAP_PFN_MASK;
+ if (!pfn) {
+ print_skip("Looking up PFNs requires CAP_SYS_ADMIN");
+ exit(KSFT_SKIP);
+ }
+
+ return pfn;
+}
+
+static bool is_page_idle(int page_idle_fd, uint64_t pfn)
+{
+ uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64);
+
+ return !!((bits >> (pfn % 64)) & 1);
+}
+
+static void mark_page_idle(int page_idle_fd, uint64_t pfn)
+{
+ uint64_t bits = 1ULL << (pfn % 64);
+
+ TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8,
+ "Set page_idle bits for PFN 0x%" PRIx64, pfn);
+}
+
+static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id)
+{
+ uint64_t base_gva = perf_test_args.vcpu_args[vcpu_id].gva;
+ uint64_t pages = perf_test_args.vcpu_args[vcpu_id].pages;
+ uint64_t page;
+ uint64_t still_idle = 0;
+ uint64_t no_pfn = 0;
+ int page_idle_fd;
+ int pagemap_fd;
+
+ /* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */
+ if (overlap_memory_access && vcpu_id)
+ return;
+
+ page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+ TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle.");
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap.");
+
+ for (page = 0; page < pages; page++) {
+ uint64_t gva = base_gva + page * perf_test_args.guest_page_size;
+ uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva);
+
+ if (!pfn) {
+ no_pfn++;
+ continue;
+ }
+
+ if (is_page_idle(page_idle_fd, pfn)) {
+ still_idle++;
+ continue;
+ }
+
+ mark_page_idle(page_idle_fd, pfn);
+ }
+
+ /*
+ * Assumption: Less than 1% of pages are going to be swapped out from
+ * under us during this test.
+ */
+ TEST_ASSERT(no_pfn < pages / 100,
+ "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.",
+ vcpu_id, no_pfn, pages);
+
+ /*
+ * Test that at least 90% of memory has been marked idle (the rest might
+ * not be marked idle because the pages have not yet made it to an LRU
+ * list or the translations are still cached in the TLB). 90% is
+ * arbitrary; high enough that we ensure most memory access went through
+ * access tracking but low enough as to not make the test too brittle
+ * over time and across architectures.
+ */
+ TEST_ASSERT(still_idle < pages / 10,
+ "vCPU%d: Too many pages still idle (%"PRIu64 " out of %"
+ PRIu64 ").\n",
+ vcpu_id, still_idle, pages);
+
+ close(page_idle_fd);
+ close(pagemap_fd);
+}
+
+static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id,
+ uint64_t expected_ucall)
+{
+ struct ucall uc;
+ uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc);
+
+ TEST_ASSERT(expected_ucall == actual_ucall,
+ "Guest exited unexpectedly (expected ucall %" PRIu64
+ ", got %" PRIu64 ")",
+ expected_ucall, actual_ucall);
+}
+
+static bool spin_wait_for_next_iteration(int *current_iteration)
+{
+ int last_iteration = *current_iteration;
+
+ do {
+ if (READ_ONCE(done))
+ return false;
+
+ *current_iteration = READ_ONCE(iteration);
+ } while (last_iteration == *current_iteration);
+
+ return true;
+}
+
+static void *vcpu_thread_main(void *arg)
+{
+ struct perf_test_vcpu_args *vcpu_args = arg;
+ struct kvm_vm *vm = perf_test_args.vm;
+ int vcpu_id = vcpu_args->vcpu_id;
+ int current_iteration = -1;
+
+ while (spin_wait_for_next_iteration(&current_iteration)) {
+ switch (READ_ONCE(iteration_work)) {
+ case ITERATION_ACCESS_MEMORY:
+ vcpu_run(vm, vcpu_id);
+ assert_ucall(vm, vcpu_id, UCALL_SYNC);
+ break;
+ case ITERATION_MARK_IDLE:
+ mark_vcpu_memory_idle(vm, vcpu_id);
+ break;
+ };
+
+ vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+ }
+
+ return NULL;
+}
+
+static void spin_wait_for_vcpu(int vcpu_id, int target_iteration)
+{
+ while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) !=
+ target_iteration) {
+ continue;
+ }
+}
+
+/* The type of memory accesses to perform in the VM. */
+enum access_type {
+ ACCESS_READ,
+ ACCESS_WRITE,
+};
+
+static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description)
+{
+ struct timespec ts_start;
+ struct timespec ts_elapsed;
+ int next_iteration;
+ int vcpu_id;
+
+ /* Kick off the vCPUs by incrementing iteration. */
+ next_iteration = ++iteration;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+ /* Wait for all vCPUs to finish the iteration. */
+ for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++)
+ spin_wait_for_vcpu(vcpu_id, next_iteration);
+
+ ts_elapsed = timespec_elapsed(ts_start);
+ pr_info("%-30s: %ld.%09lds\n",
+ description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec);
+}
+
+static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access,
+ const char *description)
+{
+ perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1;
+ sync_global_to_guest(vm, perf_test_args);
+ iteration_work = ITERATION_ACCESS_MEMORY;
+ run_iteration(vm, vcpus, description);
+}
+
+static void mark_memory_idle(struct kvm_vm *vm, int vcpus)
+{
+ /*
+ * Even though this parallelizes the work across vCPUs, this is still a
+ * very slow operation because page_idle forces the test to mark one pfn
+ * at a time and the clear_young notifier serializes on the KVM MMU
+ * lock.
+ */
+ pr_debug("Marking VM memory idle (slow)...\n");
+ iteration_work = ITERATION_MARK_IDLE;
+ run_iteration(vm, vcpus, "Mark memory idle");
+}
+
+static pthread_t *create_vcpu_threads(int vcpus)
+{
+ pthread_t *vcpu_threads;
+ int i;
+
+ vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0]));
+ TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads.");
+
+ for (i = 0; i < vcpus; i++) {
+ vcpu_last_completed_iteration[i] = iteration;
+ pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main,
+ &perf_test_args.vcpu_args[i]);
+ }
+
+ return vcpu_threads;
+}
+
+static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus)
+{
+ int i;
+
+ /* Set done to signal the vCPU threads to exit */
+ done = true;
+
+ for (i = 0; i < vcpus; i++)
+ pthread_join(vcpu_threads[i], NULL);
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ struct test_params *params = arg;
+ struct kvm_vm *vm;
+ pthread_t *vcpu_threads;
+ int vcpus = params->vcpus;
+
+ vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, 1,
+ params->backing_src);
+
+ perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes,
+ !overlap_memory_access);
+
+ vcpu_threads = create_vcpu_threads(vcpus);
+
+ pr_info("\n");
+ access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory");
+
+ /* As a control, read and write to the populated memory first. */
+ access_memory(vm, vcpus, ACCESS_WRITE, "Writing to populated memory");
+ access_memory(vm, vcpus, ACCESS_READ, "Reading from populated memory");
+
+ /* Repeat on memory that has been marked as idle. */
+ mark_memory_idle(vm, vcpus);
+ access_memory(vm, vcpus, ACCESS_WRITE, "Writing to idle memory");
+ mark_memory_idle(vm, vcpus);
+ access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory");
+
+ terminate_vcpu_threads(vcpu_threads, vcpus);
+ free(vcpu_threads);
+ perf_test_destroy_vm(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o] [-s mem_type]\n",
+ name);
+ puts("");
+ printf(" -h: Display this help message.");
+ guest_modes_help();
+ printf(" -b: specify the size of the memory region which should be\n"
+ " dirtied by each vCPU. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ printf(" -v: specify the number of vCPUs to run.\n");
+ printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+ " them into a separate region of memory for each vCPU.\n");
+ backing_src_help("-s");
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct test_params params = {
+ .backing_src = DEFAULT_VM_MEM_SRC,
+ .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
+ .vcpus = 1,
+ };
+ int page_idle_fd;
+ int opt;
+
+ guest_modes_append_default();
+
+ while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
+ switch (opt) {
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 'b':
+ params.vcpu_memory_bytes = parse_size(optarg);
+ break;
+ case 'v':
+ params.vcpus = atoi(optarg);
+ break;
+ case 'o':
+ overlap_memory_access = true;
+ break;
+ case 's':
+ params.backing_src = parse_backing_src_type(optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+ if (page_idle_fd < 0) {
+ print_skip("CONFIG_IDLE_PAGE_TRACKING is not enabled");
+ exit(KSFT_SKIP);
+ }
+ close(page_idle_fd);
+
+ for_each_guest_mode(run_test, &params);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index b74704305835..1510b21e6306 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -52,7 +52,6 @@ static void *vcpu_worker(void *data)
struct timespec start;
struct timespec ts_diff;
- vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
run = vcpu_state(vm, vcpu_id);
clock_gettime(CLOCK_MONOTONIC, &start);
@@ -180,7 +179,7 @@ static void *uffd_handler_thread_fn(void *arg)
return NULL;
}
- if (!pollfd[0].revents & POLLIN)
+ if (!(pollfd[0].revents & POLLIN))
continue;
r = read(uffd, &msg, sizeof(msg));
@@ -293,7 +292,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
int vcpu_id;
int r;
- vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+ vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
p->src_type);
perf_test_args.wr_fract = 1;
@@ -417,7 +416,7 @@ static void help(char *name)
{
puts("");
printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
- " [-b memory] [-t type] [-v vcpus] [-o]\n", name);
+ " [-b memory] [-s type] [-v vcpus] [-o]\n", name);
guest_modes_help();
printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
" UFFD registration mode: 'MISSING' or 'MINOR'.\n");
@@ -427,8 +426,7 @@ static void help(char *name)
printf(" -b: specify the size of the memory region which should be\n"
" demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n");
- printf(" -t: The type of backing memory to use. Default: anonymous\n");
- backing_src_help();
+ backing_src_help("-s");
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
@@ -440,14 +438,14 @@ int main(int argc, char *argv[])
{
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
struct test_params p = {
- .src_type = VM_MEM_SRC_ANONYMOUS,
+ .src_type = DEFAULT_VM_MEM_SRC,
.partition_vcpu_memory_access = true,
};
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -466,7 +464,7 @@ int main(int argc, char *argv[])
case 'b':
guest_percpu_mem_size = parse_size(optarg);
break;
- case 't':
+ case 's':
p.src_type = parse_backing_src_type(optarg);
break;
case 'v':
@@ -486,7 +484,7 @@ int main(int argc, char *argv[])
if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
!backing_src_is_shared(p.src_type)) {
- TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t");
+ TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
}
for_each_guest_mode(run_test, &p);
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 04a2641261be..7ffab5bd5ce5 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -44,7 +44,6 @@ static void *vcpu_worker(void *data)
struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
int vcpu_id = vcpu_args->vcpu_id;
- vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
run = vcpu_state(vm, vcpu_id);
while (!READ_ONCE(host_quit)) {
@@ -94,16 +93,89 @@ struct test_params {
int wr_fract;
bool partition_vcpu_memory_access;
enum vm_mem_backing_src_type backing_src;
+ int slots;
};
+static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = PERF_TEST_MEM_SLOT_INDEX + i;
+ int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0;
+
+ vm_mem_region_set_flags(vm, slot, flags);
+ }
+}
+
+static inline void enable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+ toggle_dirty_logging(vm, slots, true);
+}
+
+static inline void disable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+ toggle_dirty_logging(vm, slots, false);
+}
+
+static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = PERF_TEST_MEM_SLOT_INDEX + i;
+
+ kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
+ }
+}
+
+static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+ int slots, uint64_t pages_per_slot)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = PERF_TEST_MEM_SLOT_INDEX + i;
+
+ kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
+ }
+}
+
+static unsigned long **alloc_bitmaps(int slots, uint64_t pages_per_slot)
+{
+ unsigned long **bitmaps;
+ int i;
+
+ bitmaps = malloc(slots * sizeof(bitmaps[0]));
+ TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
+
+ for (i = 0; i < slots; i++) {
+ bitmaps[i] = bitmap_zalloc(pages_per_slot);
+ TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
+ }
+
+ return bitmaps;
+}
+
+static void free_bitmaps(unsigned long *bitmaps[], int slots)
+{
+ int i;
+
+ for (i = 0; i < slots; i++)
+ free(bitmaps[i]);
+
+ free(bitmaps);
+}
+
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *p = arg;
pthread_t *vcpu_threads;
struct kvm_vm *vm;
- unsigned long *bmap;
+ unsigned long **bitmaps;
uint64_t guest_num_pages;
uint64_t host_num_pages;
+ uint64_t pages_per_slot;
int vcpu_id;
struct timespec start;
struct timespec ts_diff;
@@ -114,14 +186,16 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct timespec clear_dirty_log_total = (struct timespec){0};
vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
- p->backing_src);
+ p->slots, p->backing_src);
perf_test_args.wr_fract = p->wr_fract;
guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
- bmap = bitmap_alloc(host_num_pages);
+ pages_per_slot = host_num_pages / p->slots;
+
+ bitmaps = alloc_bitmaps(p->slots, pages_per_slot);
if (dirty_log_manual_caps) {
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
@@ -163,8 +237,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
/* Enable dirty logging */
clock_gettime(CLOCK_MONOTONIC, &start);
- vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX,
- KVM_MEM_LOG_DIRTY_PAGES);
+ enable_dirty_logging(vm, p->slots);
ts_diff = timespec_elapsed(start);
pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
@@ -190,8 +263,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
clock_gettime(CLOCK_MONOTONIC, &start);
- kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap);
-
+ get_dirty_log(vm, bitmaps, p->slots);
ts_diff = timespec_elapsed(start);
get_dirty_log_total = timespec_add(get_dirty_log_total,
ts_diff);
@@ -200,9 +272,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
if (dirty_log_manual_caps) {
clock_gettime(CLOCK_MONOTONIC, &start);
- kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0,
- host_num_pages);
-
+ clear_dirty_log(vm, bitmaps, p->slots, pages_per_slot);
ts_diff = timespec_elapsed(start);
clear_dirty_log_total = timespec_add(clear_dirty_log_total,
ts_diff);
@@ -213,7 +283,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
/* Disable dirty logging */
clock_gettime(CLOCK_MONOTONIC, &start);
- vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0);
+ disable_dirty_logging(vm, p->slots);
ts_diff = timespec_elapsed(start);
pr_info("Disabling dirty logging time: %ld.%.9lds\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
@@ -235,7 +305,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
}
- free(bmap);
+ free_bitmaps(bitmaps, p->slots);
free(vcpu_threads);
perf_test_destroy_vm(vm);
}
@@ -244,7 +314,8 @@ static void help(char *name)
{
puts("");
printf("usage: %s [-h] [-i iterations] [-p offset] "
- "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]\n", name);
+ "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
+ "[-x memslots]\n", name);
puts("");
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
TEST_HOST_LOOP_N);
@@ -261,9 +332,9 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
- printf(" -s: specify the type of memory that should be used to\n"
- " back the guest data region.\n\n");
- backing_src_help();
+ backing_src_help("-s");
+ printf(" -x: Split the memory region into this number of memslots.\n"
+ " (default: 1)\n");
puts("");
exit(0);
}
@@ -275,7 +346,8 @@ int main(int argc, char *argv[])
.iterations = TEST_HOST_LOOP_N,
.wr_fract = 1,
.partition_vcpu_memory_access = true,
- .backing_src = VM_MEM_SRC_ANONYMOUS,
+ .backing_src = DEFAULT_VM_MEM_SRC,
+ .slots = 1,
};
int opt;
@@ -286,7 +358,7 @@ int main(int argc, char *argv[])
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:os:")) != -1) {
+ while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:os:x:")) != -1) {
switch (opt) {
case 'i':
p.iterations = atoi(optarg);
@@ -312,9 +384,13 @@ int main(int argc, char *argv[])
break;
case 'o':
p.partition_vcpu_memory_access = false;
+ break;
case 's':
p.backing_src = parse_backing_src_type(optarg);
break;
+ case 'x':
+ p.slots = atoi(optarg);
+ break;
case 'h':
default:
help(argv[0]);
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 81edbd23d371..792c60e1b17d 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -16,7 +16,6 @@
#include <errno.h>
#include <linux/bitmap.h>
#include <linux/bitops.h>
-#include <asm/barrier.h>
#include <linux/atomic.h>
#include "kvm_util.h"
@@ -681,7 +680,7 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ kvm_vm_elf_load(vm, program_invocation_name);
#ifdef __x86_64__
vm_create_irqchip(vm);
#endif
@@ -750,8 +749,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
- bmap = bitmap_alloc(host_num_pages);
- host_bmap_track = bitmap_alloc(host_num_pages);
+ bmap = bitmap_zalloc(host_num_pages);
+ host_bmap_track = bitmap_zalloc(host_num_pages);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -761,7 +760,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
KVM_MEM_LOG_DIRTY_PAGES);
/* Do mapping for the dirty track memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
/* Cache the HVA pointer of the region */
host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
index 4b8db3bce610..b21c69a56daa 100644
--- a/tools/testing/selftests/kvm/hardware_disable_test.c
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -105,7 +105,7 @@ static void run_test(uint32_t run)
CPU_SET(i, &cpu_set);
vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ kvm_vm_elf_load(vm, program_invocation_name);
vm_create_irqchip(vm);
pr_debug("%s: [%d] start vcpus\n", __func__, run);
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index b7fa0c8551db..c0273aefa63d 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -8,16 +8,21 @@
#define SELFTEST_KVM_PROCESSOR_H
#include "kvm_util.h"
+#include <linux/stringify.h>
#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
-#define CPACR_EL1 3, 0, 1, 0, 2
-#define TCR_EL1 3, 0, 2, 0, 2
-#define MAIR_EL1 3, 0, 10, 2, 0
-#define TTBR0_EL1 3, 0, 2, 0, 0
-#define SCTLR_EL1 3, 0, 1, 0, 0
+#define CPACR_EL1 3, 0, 1, 0, 2
+#define TCR_EL1 3, 0, 2, 0, 2
+#define MAIR_EL1 3, 0, 10, 2, 0
+#define MPIDR_EL1 3, 0, 0, 0, 5
+#define TTBR0_EL1 3, 0, 2, 0, 0
+#define SCTLR_EL1 3, 0, 1, 0, 0
+#define VBAR_EL1 3, 0, 12, 0, 0
+
+#define ID_AA64DFR0_EL1 3, 0, 0, 5, 0
/*
* Default MAIR
@@ -36,6 +41,8 @@
(0xfful << (4 * 8)) | \
(0xbbul << (5 * 8)))
+#define MPIDR_HWID_BITMASK (0xff00fffffful)
+
static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr)
{
struct kvm_one_reg reg;
@@ -56,4 +63,73 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_init *init, void *guest_code);
+struct ex_regs {
+ u64 regs[31];
+ u64 sp;
+ u64 pc;
+ u64 pstate;
+};
+
+#define VECTOR_NUM 16
+
+enum {
+ VECTOR_SYNC_CURRENT_SP0,
+ VECTOR_IRQ_CURRENT_SP0,
+ VECTOR_FIQ_CURRENT_SP0,
+ VECTOR_ERROR_CURRENT_SP0,
+
+ VECTOR_SYNC_CURRENT,
+ VECTOR_IRQ_CURRENT,
+ VECTOR_FIQ_CURRENT,
+ VECTOR_ERROR_CURRENT,
+
+ VECTOR_SYNC_LOWER_64,
+ VECTOR_IRQ_LOWER_64,
+ VECTOR_FIQ_LOWER_64,
+ VECTOR_ERROR_LOWER_64,
+
+ VECTOR_SYNC_LOWER_32,
+ VECTOR_IRQ_LOWER_32,
+ VECTOR_FIQ_LOWER_32,
+ VECTOR_ERROR_LOWER_32,
+};
+
+#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
+ (v) == VECTOR_SYNC_CURRENT || \
+ (v) == VECTOR_SYNC_LOWER_64 || \
+ (v) == VECTOR_SYNC_LOWER_32)
+
+#define ESR_EC_NUM 64
+#define ESR_EC_SHIFT 26
+#define ESR_EC_MASK (ESR_EC_NUM - 1)
+
+#define ESR_EC_SVC64 0x15
+#define ESR_EC_HW_BP_CURRENT 0x31
+#define ESR_EC_SSTEP_CURRENT 0x33
+#define ESR_EC_WP_CURRENT 0x35
+#define ESR_EC_BRK_INS 0x3c
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
+
+typedef void(*handler_fn)(struct ex_regs *);
+void vm_install_exception_handler(struct kvm_vm *vm,
+ int vector, handler_fn handler);
+void vm_install_sync_handler(struct kvm_vm *vm,
+ int vector, int ec, handler_fn handler);
+
+#define write_sysreg(reg, val) \
+({ \
+ u64 __val = (u64)(val); \
+ asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val)); \
+})
+
+#define read_sysreg(reg) \
+({ u64 val; \
+ asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\
+ val; \
+})
+
+#define isb() asm volatile("isb" : : : "memory")
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 35739567189e..010b59b13917 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -30,6 +30,7 @@ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
/* Minimum allocated guest virtual and physical addresses */
#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
#define DEFAULT_GUEST_PHY_PAGES 512
#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
@@ -44,6 +45,7 @@ enum vm_guest_mode {
VM_MODE_P40V48_64K,
VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
VM_MODE_P47V64_4K,
+ VM_MODE_P44V64_4K,
NUM_VM_MODES,
};
@@ -61,7 +63,7 @@ enum vm_guest_mode {
#elif defined(__s390x__)
-#define VM_MODE_DEFAULT VM_MODE_P47V64_4K
+#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
#define MIN_PAGE_SHIFT 12U
#define ptes_per_page(page_size) ((page_size) / 16)
@@ -98,8 +100,7 @@ uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm);
int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
size_t len);
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- uint32_t data_memslot, uint32_t pgd_memslot);
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
@@ -141,10 +142,12 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
+
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages, uint32_t pgd_memslot);
+ unsigned int npages);
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
@@ -237,7 +240,7 @@ int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
const char *exit_reason_str(unsigned int exit_reason);
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+void virt_pgd_alloc(struct kvm_vm *vm);
/*
* VM Virtual Page Map
@@ -255,13 +258,13 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
* Within @vm, creates a virtual translation for the page starting
* at @vaddr to the page starting at @paddr.
*/
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t memslot);
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
uint32_t memslot);
vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
vm_paddr_t paddr_min, uint32_t memslot);
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
/*
* Create a VM with reasonable defaults
@@ -351,6 +354,7 @@ enum {
UCALL_SYNC,
UCALL_ABORT,
UCALL_DONE,
+ UCALL_UNHANDLED,
};
#define UCALL_MAX_ARGS 6
@@ -369,26 +373,31 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
#define GUEST_DONE() ucall(UCALL_DONE, 0)
-#define __GUEST_ASSERT(_condition, _nargs, _args...) do { \
- if (!(_condition)) \
- ucall(UCALL_ABORT, 2 + _nargs, \
- "Failed guest assert: " \
- #_condition, __LINE__, _args); \
+#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \
+ if (!(_condition)) \
+ ucall(UCALL_ABORT, 2 + _nargs, \
+ "Failed guest assert: " \
+ _condstr, __LINE__, _args); \
} while (0)
#define GUEST_ASSERT(_condition) \
- __GUEST_ASSERT((_condition), 0, 0)
+ __GUEST_ASSERT(_condition, #_condition, 0, 0)
#define GUEST_ASSERT_1(_condition, arg1) \
- __GUEST_ASSERT((_condition), 1, (arg1))
+ __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
#define GUEST_ASSERT_2(_condition, arg1, arg2) \
- __GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+ __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
- __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+ __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
- __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+ __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
+
+#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
+
+int vm_get_stats_fd(struct kvm_vm *vm);
+int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index 005f2143adeb..df9f1a3a3ffb 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -44,7 +44,7 @@ extern struct perf_test_args perf_test_args;
extern uint64_t guest_test_phys_mem;
struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
- uint64_t vcpu_memory_bytes,
+ uint64_t vcpu_memory_bytes, int slots,
enum vm_mem_backing_src_type backing_src);
void perf_test_destroy_vm(struct kvm_vm *vm);
void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index d79be15dd3d2..f8fddc84c0d3 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -90,18 +90,23 @@ enum vm_mem_backing_src_type {
NUM_SRC_TYPES,
};
+#define DEFAULT_VM_MEM_SRC VM_MEM_SRC_ANONYMOUS
+
struct vm_mem_backing_src_alias {
const char *name;
uint32_t flag;
};
+#define MIN_RUN_DELAY_NS 200000UL
+
bool thp_configured(void);
size_t get_trans_hugepagesz(void);
size_t get_def_hugetlb_pagesz(void);
const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
size_t get_backing_src_pagesz(uint32_t i);
-void backing_src_help(void);
+void backing_src_help(const char *flag);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
+long get_run_delay(void);
/*
* Whether or not the given source type is shared memory (as opposed to
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
new file mode 100644
index 000000000000..0be4757f1f20
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/apic.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/apic.h
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_APIC_H
+#define SELFTEST_KVM_APIC_H
+
+#include <stdint.h>
+
+#include "processor.h"
+
+#define APIC_DEFAULT_GPA 0xfee00000ULL
+
+/* APIC base address MSR and fields */
+#define MSR_IA32_APICBASE 0x0000001b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_EXTD (1<<10)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+#define GET_APIC_BASE(x) (((x) >> 12) << 12)
+
+#define APIC_BASE_MSR 0x800
+#define X2APIC_ENABLE (1UL << 10)
+#define APIC_ID 0x20
+#define APIC_LVR 0x30
+#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
+#define APIC_TASKPRI 0x80
+#define APIC_PROCPRI 0xA0
+#define APIC_EOI 0xB0
+#define APIC_SPIV 0xF0
+#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
+#define APIC_SPIV_APIC_ENABLED (1 << 8)
+#define APIC_ICR 0x300
+#define APIC_DEST_SELF 0x40000
+#define APIC_DEST_ALLINC 0x80000
+#define APIC_DEST_ALLBUT 0xC0000
+#define APIC_ICR_RR_MASK 0x30000
+#define APIC_ICR_RR_INVALID 0x00000
+#define APIC_ICR_RR_INPROG 0x10000
+#define APIC_ICR_RR_VALID 0x20000
+#define APIC_INT_LEVELTRIG 0x08000
+#define APIC_INT_ASSERT 0x04000
+#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_LOGICAL 0x00800
+#define APIC_DEST_PHYSICAL 0x00000
+#define APIC_DM_FIXED 0x00000
+#define APIC_DM_FIXED_MASK 0x00700
+#define APIC_DM_LOWEST 0x00100
+#define APIC_DM_SMI 0x00200
+#define APIC_DM_REMRD 0x00300
+#define APIC_DM_NMI 0x00400
+#define APIC_DM_INIT 0x00500
+#define APIC_DM_STARTUP 0x00600
+#define APIC_DM_EXTINT 0x00700
+#define APIC_VECTOR_MASK 0x000FF
+#define APIC_ICR2 0x310
+#define SET_APIC_DEST_FIELD(x) ((x) << 24)
+
+void apic_disable(void);
+void xapic_enable(void);
+void x2apic_enable(void);
+
+static inline uint32_t get_bsp_flag(void)
+{
+ return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
+}
+
+static inline uint32_t xapic_read_reg(unsigned int reg)
+{
+ return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
+}
+
+static inline void xapic_write_reg(unsigned int reg, uint32_t val)
+{
+ ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
+}
+
+static inline uint64_t x2apic_read_reg(unsigned int reg)
+{
+ return rdmsr(APIC_BASE_MSR + (reg >> 4));
+}
+
+static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
+{
+ wrmsr(APIC_BASE_MSR + (reg >> 4), value);
+}
+
+#endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index a034438b6266..c9af97abd622 100644
--- a/tools/testing/selftests/kvm/include/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * tools/testing/selftests/kvm/include/vmx.h
+ * tools/testing/selftests/kvm/include/x86_64/evmcs.h
*
* Copyright (C) 2018, Red Hat, Inc.
*
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
new file mode 100644
index 000000000000..b66910702c0a
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -0,0 +1,188 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/hyperv.h
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+
+#ifndef SELFTEST_KVM_HYPERV_H
+#define SELFTEST_KVM_HYPERV_H
+
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
+#define HYPERV_CPUID_INTERFACE 0x40000001
+#define HYPERV_CPUID_VERSION 0x40000002
+#define HYPERV_CPUID_FEATURES 0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
+#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007
+#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A
+#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080
+#define HYPERV_CPUID_SYNDBG_INTERFACE 0x40000081
+#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082
+
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+#define HV_X64_MSR_HYPERCALL 0x40000001
+#define HV_X64_MSR_VP_INDEX 0x40000002
+#define HV_X64_MSR_RESET 0x40000003
+#define HV_X64_MSR_VP_RUNTIME 0x40000010
+#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+#define HV_X64_MSR_REFERENCE_TSC 0x40000021
+#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
+#define HV_X64_MSR_APIC_FREQUENCY 0x40000023
+#define HV_X64_MSR_EOI 0x40000070
+#define HV_X64_MSR_ICR 0x40000071
+#define HV_X64_MSR_TPR 0x40000072
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
+#define HV_X64_MSR_SCONTROL 0x40000080
+#define HV_X64_MSR_SVERSION 0x40000081
+#define HV_X64_MSR_SIEFP 0x40000082
+#define HV_X64_MSR_SIMP 0x40000083
+#define HV_X64_MSR_EOM 0x40000084
+#define HV_X64_MSR_SINT0 0x40000090
+#define HV_X64_MSR_SINT1 0x40000091
+#define HV_X64_MSR_SINT2 0x40000092
+#define HV_X64_MSR_SINT3 0x40000093
+#define HV_X64_MSR_SINT4 0x40000094
+#define HV_X64_MSR_SINT5 0x40000095
+#define HV_X64_MSR_SINT6 0x40000096
+#define HV_X64_MSR_SINT7 0x40000097
+#define HV_X64_MSR_SINT8 0x40000098
+#define HV_X64_MSR_SINT9 0x40000099
+#define HV_X64_MSR_SINT10 0x4000009A
+#define HV_X64_MSR_SINT11 0x4000009B
+#define HV_X64_MSR_SINT12 0x4000009C
+#define HV_X64_MSR_SINT13 0x4000009D
+#define HV_X64_MSR_SINT14 0x4000009E
+#define HV_X64_MSR_SINT15 0x4000009F
+#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
+#define HV_X64_MSR_STIMER0_COUNT 0x400000B1
+#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
+#define HV_X64_MSR_STIMER1_COUNT 0x400000B3
+#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
+#define HV_X64_MSR_STIMER2_COUNT 0x400000B5
+#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
+#define HV_X64_MSR_STIMER3_COUNT 0x400000B7
+#define HV_X64_MSR_GUEST_IDLE 0x400000F0
+#define HV_X64_MSR_CRASH_P0 0x40000100
+#define HV_X64_MSR_CRASH_P1 0x40000101
+#define HV_X64_MSR_CRASH_P2 0x40000102
+#define HV_X64_MSR_CRASH_P3 0x40000103
+#define HV_X64_MSR_CRASH_P4 0x40000104
+#define HV_X64_MSR_CRASH_CTL 0x40000105
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
+
+#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1
+#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2
+#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3
+#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4
+#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5
+#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF
+
+/* HYPERV_CPUID_FEATURES.EAX */
+#define HV_MSR_VP_RUNTIME_AVAILABLE BIT(0)
+#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1)
+#define HV_MSR_SYNIC_AVAILABLE BIT(2)
+#define HV_MSR_SYNTIMER_AVAILABLE BIT(3)
+#define HV_MSR_APIC_ACCESS_AVAILABLE BIT(4)
+#define HV_MSR_HYPERCALL_AVAILABLE BIT(5)
+#define HV_MSR_VP_INDEX_AVAILABLE BIT(6)
+#define HV_MSR_RESET_AVAILABLE BIT(7)
+#define HV_MSR_STAT_PAGES_AVAILABLE BIT(8)
+#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9)
+#define HV_MSR_GUEST_IDLE_AVAILABLE BIT(10)
+#define HV_ACCESS_FREQUENCY_MSRS BIT(11)
+#define HV_ACCESS_REENLIGHTENMENT BIT(13)
+#define HV_ACCESS_TSC_INVARIANT BIT(15)
+
+/* HYPERV_CPUID_FEATURES.EBX */
+#define HV_CREATE_PARTITIONS BIT(0)
+#define HV_ACCESS_PARTITION_ID BIT(1)
+#define HV_ACCESS_MEMORY_POOL BIT(2)
+#define HV_ADJUST_MESSAGE_BUFFERS BIT(3)
+#define HV_POST_MESSAGES BIT(4)
+#define HV_SIGNAL_EVENTS BIT(5)
+#define HV_CREATE_PORT BIT(6)
+#define HV_CONNECT_PORT BIT(7)
+#define HV_ACCESS_STATS BIT(8)
+#define HV_DEBUGGING BIT(11)
+#define HV_CPU_MANAGEMENT BIT(12)
+#define HV_ISOLATION BIT(22)
+
+/* HYPERV_CPUID_FEATURES.EDX */
+#define HV_X64_MWAIT_AVAILABLE BIT(0)
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1)
+#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2)
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4)
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
+#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8)
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
+#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11)
+#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19)
+
+/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0)
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1)
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2)
+#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3)
+#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4)
+#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5)
+#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9)
+#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10)
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
+#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
+
+/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING BIT(1)
+
+/* Hypercalls */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_SEND_IPI 0x000b
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
+#define HVCALL_SEND_IPI_EX 0x0015
+#define HVCALL_GET_PARTITION_ID 0x0046
+#define HVCALL_DEPOSIT_MEMORY 0x0048
+#define HVCALL_CREATE_VP 0x004e
+#define HVCALL_GET_VP_REGISTERS 0x0050
+#define HVCALL_SET_VP_REGISTERS 0x0051
+#define HVCALL_POST_MESSAGE 0x005c
+#define HVCALL_SIGNAL_EVENT 0x005d
+#define HVCALL_POST_DEBUG_DATA 0x0069
+#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a
+#define HVCALL_RESET_DEBUG_SESSION 0x006b
+#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076
+#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
+#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
+#define HVCALL_RETARGET_INTERRUPT 0x007e
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS 0
+#define HV_STATUS_INVALID_HYPERCALL_CODE 2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
+#define HV_STATUS_INVALID_ALIGNMENT 4
+#define HV_STATUS_INVALID_PARAMETER 5
+#define HV_STATUS_ACCESS_DENIED 6
+#define HV_STATUS_OPERATION_DENIED 8
+#define HV_STATUS_INSUFFICIENT_MEMORY 11
+#define HV_STATUS_INVALID_PORT_ID 17
+#define HV_STATUS_INVALID_CONNECTION_ID 18
+#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+
+/* hypercall options */
+#define HV_HYPERCALL_FAST_BIT BIT(16)
+
+#endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 0b30b4e15c38..05e65ca1c30c 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -13,6 +13,8 @@
#include <asm/msr-index.h>
+#include "../kvm_util.h"
+
#define X86_EFLAGS_FIXED (1u << 1)
#define X86_CR4_VME (1ul << 0)
@@ -53,7 +55,8 @@
#define CPUID_PKU (1ul << 3)
#define CPUID_LA57 (1ul << 16)
-#define UNEXPECTED_VECTOR_PORT 0xfff0u
+/* CPUID.0x8000_0001.EDX */
+#define CPUID_GBPAGES (1ul << 26)
/* General Registers in 64-Bit Mode */
struct gpr64_regs {
@@ -309,37 +312,37 @@ static inline void set_xmm(int n, unsigned long val)
}
}
-typedef unsigned long v1di __attribute__ ((vector_size (8)));
+#define GET_XMM(__xmm) \
+({ \
+ unsigned long __val; \
+ asm volatile("movq %%"#__xmm", %0" : "=r"(__val)); \
+ __val; \
+})
+
static inline unsigned long get_xmm(int n)
{
assert(n >= 0 && n <= 7);
- register v1di xmm0 __asm__("%xmm0");
- register v1di xmm1 __asm__("%xmm1");
- register v1di xmm2 __asm__("%xmm2");
- register v1di xmm3 __asm__("%xmm3");
- register v1di xmm4 __asm__("%xmm4");
- register v1di xmm5 __asm__("%xmm5");
- register v1di xmm6 __asm__("%xmm6");
- register v1di xmm7 __asm__("%xmm7");
switch (n) {
case 0:
- return (unsigned long)xmm0;
+ return GET_XMM(xmm0);
case 1:
- return (unsigned long)xmm1;
+ return GET_XMM(xmm1);
case 2:
- return (unsigned long)xmm2;
+ return GET_XMM(xmm2);
case 3:
- return (unsigned long)xmm3;
+ return GET_XMM(xmm3);
case 4:
- return (unsigned long)xmm4;
+ return GET_XMM(xmm4);
case 5:
- return (unsigned long)xmm5;
+ return GET_XMM(xmm5);
case 6:
- return (unsigned long)xmm6;
+ return GET_XMM(xmm6);
case 7:
- return (unsigned long)xmm7;
+ return GET_XMM(xmm7);
}
+
+ /* never reached */
return 0;
}
@@ -391,9 +394,13 @@ struct ex_regs {
void vm_init_descriptor_tables(struct kvm_vm *vm);
void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
-void vm_handle_exception(struct kvm_vm *vm, int vector,
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *));
+uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr);
+void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
+ uint64_t pte);
+
/*
* set_cpuid() - overwrites a matching cpuid entry with the provided value.
* matches based on ent->function && ent->index. returns true
@@ -410,6 +417,14 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
+enum x86_page_size {
+ X86_PAGE_SIZE_4K = 0,
+ X86_PAGE_SIZE_2M,
+ X86_PAGE_SIZE_1G,
+};
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ enum x86_page_size page_size);
+
/*
* Basic CPU control in CR0
*/
@@ -425,53 +440,6 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
-#define APIC_DEFAULT_GPA 0xfee00000ULL
-
-/* APIC base address MSR and fields */
-#define MSR_IA32_APICBASE 0x0000001b
-#define MSR_IA32_APICBASE_BSP (1<<8)
-#define MSR_IA32_APICBASE_EXTD (1<<10)
-#define MSR_IA32_APICBASE_ENABLE (1<<11)
-#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-#define GET_APIC_BASE(x) (((x) >> 12) << 12)
-
-#define APIC_BASE_MSR 0x800
-#define X2APIC_ENABLE (1UL << 10)
-#define APIC_ID 0x20
-#define APIC_LVR 0x30
-#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
-#define APIC_TASKPRI 0x80
-#define APIC_PROCPRI 0xA0
-#define APIC_EOI 0xB0
-#define APIC_SPIV 0xF0
-#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
-#define APIC_SPIV_APIC_ENABLED (1 << 8)
-#define APIC_ICR 0x300
-#define APIC_DEST_SELF 0x40000
-#define APIC_DEST_ALLINC 0x80000
-#define APIC_DEST_ALLBUT 0xC0000
-#define APIC_ICR_RR_MASK 0x30000
-#define APIC_ICR_RR_INVALID 0x00000
-#define APIC_ICR_RR_INPROG 0x10000
-#define APIC_ICR_RR_VALID 0x20000
-#define APIC_INT_LEVELTRIG 0x08000
-#define APIC_INT_ASSERT 0x04000
-#define APIC_ICR_BUSY 0x01000
-#define APIC_DEST_LOGICAL 0x00800
-#define APIC_DEST_PHYSICAL 0x00000
-#define APIC_DM_FIXED 0x00000
-#define APIC_DM_FIXED_MASK 0x00700
-#define APIC_DM_LOWEST 0x00100
-#define APIC_DM_SMI 0x00200
-#define APIC_DM_REMRD 0x00300
-#define APIC_DM_NMI 0x00400
-#define APIC_DM_INIT 0x00500
-#define APIC_DM_STARTUP 0x00600
-#define APIC_DM_EXTINT 0x00700
-#define APIC_VECTOR_MASK 0x000FF
-#define APIC_ICR2 0x310
-#define SET_APIC_DEST_FIELD(x) ((x) << 24)
-
/* VMX_EPT_VPID_CAP bits */
#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 65eb1079a161..583ceb0d1457 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -10,6 +10,7 @@
#include <stdint.h>
#include "processor.h"
+#include "apic.h"
/*
* Definitions of Primary Processor-Based VM-Execution Controls.
@@ -607,15 +608,13 @@ bool nested_vmx_supported(void);
void nested_vmx_check_supported(void);
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot);
+ uint64_t nested_paddr, uint64_t paddr);
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- uint32_t eptp_memslot);
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size);
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot, uint32_t eptp_memslot);
+ uint32_t memslot);
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot);
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot);
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
new file mode 100644
index 000000000000..17f65d514915
--- /dev/null
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kvm_binary_stats_test
+ *
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Test the fd-based interface for KVM statistics.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+
+static void stats_test(int stats_fd)
+{
+ ssize_t ret;
+ int i;
+ size_t size_desc;
+ size_t size_data = 0;
+ struct kvm_stats_header *header;
+ char *id;
+ struct kvm_stats_desc *stats_desc;
+ u64 *stats_data;
+ struct kvm_stats_desc *pdesc;
+
+ /* Read kvm stats header */
+ header = malloc(sizeof(*header));
+ TEST_ASSERT(header, "Allocate memory for stats header");
+
+ ret = read(stats_fd, header, sizeof(*header));
+ TEST_ASSERT(ret == sizeof(*header), "Read stats header");
+ size_desc = sizeof(*stats_desc) + header->name_size;
+
+ /* Read kvm stats id string */
+ id = malloc(header->name_size);
+ TEST_ASSERT(id, "Allocate memory for id string");
+ ret = read(stats_fd, id, header->name_size);
+ TEST_ASSERT(ret == header->name_size, "Read id string");
+
+ /* Check id string, that should start with "kvm" */
+ TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header->name_size,
+ "Invalid KVM stats type, id: %s", id);
+
+ /* Sanity check for other fields in header */
+ if (header->num_desc == 0) {
+ printf("No KVM stats defined!");
+ return;
+ }
+ /* Check overlap */
+ TEST_ASSERT(header->desc_offset > 0 && header->data_offset > 0
+ && header->desc_offset >= sizeof(*header)
+ && header->data_offset >= sizeof(*header),
+ "Invalid offset fields in header");
+ TEST_ASSERT(header->desc_offset > header->data_offset ||
+ (header->desc_offset + size_desc * header->num_desc <=
+ header->data_offset),
+ "Descriptor block is overlapped with data block");
+
+ /* Allocate memory for stats descriptors */
+ stats_desc = calloc(header->num_desc, size_desc);
+ TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors");
+ /* Read kvm stats descriptors */
+ ret = pread(stats_fd, stats_desc,
+ size_desc * header->num_desc, header->desc_offset);
+ TEST_ASSERT(ret == size_desc * header->num_desc,
+ "Read KVM stats descriptors");
+
+ /* Sanity check for fields in descriptors */
+ for (i = 0; i < header->num_desc; ++i) {
+ pdesc = (void *)stats_desc + i * size_desc;
+ /* Check type,unit,base boundaries */
+ TEST_ASSERT((pdesc->flags & KVM_STATS_TYPE_MASK)
+ <= KVM_STATS_TYPE_MAX, "Unknown KVM stats type");
+ TEST_ASSERT((pdesc->flags & KVM_STATS_UNIT_MASK)
+ <= KVM_STATS_UNIT_MAX, "Unknown KVM stats unit");
+ TEST_ASSERT((pdesc->flags & KVM_STATS_BASE_MASK)
+ <= KVM_STATS_BASE_MAX, "Unknown KVM stats base");
+ /* Check exponent for stats unit
+ * Exponent for counter should be greater than or equal to 0
+ * Exponent for unit bytes should be greater than or equal to 0
+ * Exponent for unit seconds should be less than or equal to 0
+ * Exponent for unit clock cycles should be greater than or
+ * equal to 0
+ */
+ switch (pdesc->flags & KVM_STATS_UNIT_MASK) {
+ case KVM_STATS_UNIT_NONE:
+ case KVM_STATS_UNIT_BYTES:
+ case KVM_STATS_UNIT_CYCLES:
+ TEST_ASSERT(pdesc->exponent >= 0,
+ "Unsupported KVM stats unit");
+ break;
+ case KVM_STATS_UNIT_SECONDS:
+ TEST_ASSERT(pdesc->exponent <= 0,
+ "Unsupported KVM stats unit");
+ break;
+ }
+ /* Check name string */
+ TEST_ASSERT(strlen(pdesc->name) < header->name_size,
+ "KVM stats name(%s) too long", pdesc->name);
+ /* Check size field, which should not be zero */
+ TEST_ASSERT(pdesc->size, "KVM descriptor(%s) with size of 0",
+ pdesc->name);
+ /* Check bucket_size field */
+ switch (pdesc->flags & KVM_STATS_TYPE_MASK) {
+ case KVM_STATS_TYPE_LINEAR_HIST:
+ TEST_ASSERT(pdesc->bucket_size,
+ "Bucket size of Linear Histogram stats (%s) is zero",
+ pdesc->name);
+ break;
+ default:
+ TEST_ASSERT(!pdesc->bucket_size,
+ "Bucket size of stats (%s) is not zero",
+ pdesc->name);
+ }
+ size_data += pdesc->size * sizeof(*stats_data);
+ }
+ /* Check overlap */
+ TEST_ASSERT(header->data_offset >= header->desc_offset
+ || header->data_offset + size_data <= header->desc_offset,
+ "Data block is overlapped with Descriptor block");
+ /* Check validity of all stats data size */
+ TEST_ASSERT(size_data >= header->num_desc * sizeof(*stats_data),
+ "Data size is not correct");
+ /* Check stats offset */
+ for (i = 0; i < header->num_desc; ++i) {
+ pdesc = (void *)stats_desc + i * size_desc;
+ TEST_ASSERT(pdesc->offset < size_data,
+ "Invalid offset (%u) for stats: %s",
+ pdesc->offset, pdesc->name);
+ }
+
+ /* Allocate memory for stats data */
+ stats_data = malloc(size_data);
+ TEST_ASSERT(stats_data, "Allocate memory for stats data");
+ /* Read kvm stats data as a bulk */
+ ret = pread(stats_fd, stats_data, size_data, header->data_offset);
+ TEST_ASSERT(ret == size_data, "Read KVM stats data");
+ /* Read kvm stats data one by one */
+ size_data = 0;
+ for (i = 0; i < header->num_desc; ++i) {
+ pdesc = (void *)stats_desc + i * size_desc;
+ ret = pread(stats_fd, stats_data,
+ pdesc->size * sizeof(*stats_data),
+ header->data_offset + size_data);
+ TEST_ASSERT(ret == pdesc->size * sizeof(*stats_data),
+ "Read data of KVM stats: %s", pdesc->name);
+ size_data += pdesc->size * sizeof(*stats_data);
+ }
+
+ free(stats_data);
+ free(stats_desc);
+ free(id);
+ free(header);
+}
+
+
+static void vm_stats_test(struct kvm_vm *vm)
+{
+ int stats_fd;
+
+ /* Get fd for VM stats */
+ stats_fd = vm_get_stats_fd(vm);
+ TEST_ASSERT(stats_fd >= 0, "Get VM stats fd");
+
+ stats_test(stats_fd);
+ close(stats_fd);
+ TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
+}
+
+static void vcpu_stats_test(struct kvm_vm *vm, int vcpu_id)
+{
+ int stats_fd;
+
+ /* Get fd for VCPU stats */
+ stats_fd = vcpu_get_stats_fd(vm, vcpu_id);
+ TEST_ASSERT(stats_fd >= 0, "Get VCPU stats fd");
+
+ stats_test(stats_fd);
+ close(stats_fd);
+ TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
+}
+
+#define DEFAULT_NUM_VM 4
+#define DEFAULT_NUM_VCPU 4
+
+/*
+ * Usage: kvm_bin_form_stats [#vm] [#vcpu]
+ * The first parameter #vm set the number of VMs being created.
+ * The second parameter #vcpu set the number of VCPUs being created.
+ * By default, DEFAULT_NUM_VM VM and DEFAULT_NUM_VCPU VCPU for the VM would be
+ * created for testing.
+ */
+
+int main(int argc, char *argv[])
+{
+ int i, j;
+ struct kvm_vm **vms;
+ int max_vm = DEFAULT_NUM_VM;
+ int max_vcpu = DEFAULT_NUM_VCPU;
+
+ /* Get the number of VMs and VCPUs that would be created for testing. */
+ if (argc > 1) {
+ max_vm = strtol(argv[1], NULL, 0);
+ if (max_vm <= 0)
+ max_vm = DEFAULT_NUM_VM;
+ }
+ if (argc > 2) {
+ max_vcpu = strtol(argv[2], NULL, 0);
+ if (max_vcpu <= 0)
+ max_vcpu = DEFAULT_NUM_VCPU;
+ }
+
+ /* Check the extension for binary stats */
+ if (kvm_check_cap(KVM_CAP_BINARY_STATS_FD) <= 0) {
+ print_skip("Binary form statistics interface is not supported");
+ exit(KSFT_SKIP);
+ }
+
+ /* Create VMs and VCPUs */
+ vms = malloc(sizeof(vms[0]) * max_vm);
+ TEST_ASSERT(vms, "Allocate memory for storing VM pointers");
+ for (i = 0; i < max_vm; ++i) {
+ vms[i] = vm_create(VM_MODE_DEFAULT,
+ DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ for (j = 0; j < max_vcpu; ++j)
+ vm_vcpu_add(vms[i], j);
+ }
+
+ /* Check stats read for every VM and VCPU */
+ for (i = 0; i < max_vm; ++i) {
+ vm_stats_test(vms[i]);
+ for (j = 0; j < max_vcpu; ++j)
+ vcpu_stats_test(vms[i], j);
+ }
+
+ for (i = 0; i < max_vm; ++i)
+ kvm_vm_free(vms[i]);
+ free(vms);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 82171f17c1d7..36407cb0ec85 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -303,7 +303,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
TEST_MEM_SLOT_INDEX, guest_num_pages, 0);
/* Do mapping(GVA->GPA) for the testing memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
/* Cache the HVA pointer of the region */
host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
@@ -456,10 +456,7 @@ static void help(char *name)
" (default: 1G)\n");
printf(" -v: specify the number of vCPUs to run\n"
" (default: 1)\n");
- printf(" -s: specify the type of memory that should be used to\n"
- " back the guest data region.\n"
- " (default: anonymous)\n\n");
- backing_src_help();
+ backing_src_help("-s");
puts("");
}
@@ -468,7 +465,7 @@ int main(int argc, char *argv[])
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
struct test_params p = {
.test_mem_size = DEFAULT_TEST_MEM_SIZE,
- .src_type = VM_MEM_SRC_ANONYMOUS,
+ .src_type = DEFAULT_VM_MEM_SRC,
};
int opt;
diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
new file mode 100644
index 000000000000..0e443eadfac6
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.macro save_registers
+ add sp, sp, #-16 * 17
+
+ stp x0, x1, [sp, #16 * 0]
+ stp x2, x3, [sp, #16 * 1]
+ stp x4, x5, [sp, #16 * 2]
+ stp x6, x7, [sp, #16 * 3]
+ stp x8, x9, [sp, #16 * 4]
+ stp x10, x11, [sp, #16 * 5]
+ stp x12, x13, [sp, #16 * 6]
+ stp x14, x15, [sp, #16 * 7]
+ stp x16, x17, [sp, #16 * 8]
+ stp x18, x19, [sp, #16 * 9]
+ stp x20, x21, [sp, #16 * 10]
+ stp x22, x23, [sp, #16 * 11]
+ stp x24, x25, [sp, #16 * 12]
+ stp x26, x27, [sp, #16 * 13]
+ stp x28, x29, [sp, #16 * 14]
+
+ /*
+ * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
+ * at it. It will _not_ be used to restore the sp on return from the
+ * exception so handlers can not update it.
+ */
+ add x1, sp, #16 * 17
+ stp x30, x1, [sp, #16 * 15] /* x30, SP */
+
+ mrs x1, elr_el1
+ mrs x2, spsr_el1
+ stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+.endm
+
+.macro restore_registers
+ ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+ msr elr_el1, x1
+ msr spsr_el1, x2
+
+ /* sp is not restored */
+ ldp x30, xzr, [sp, #16 * 15] /* x30, SP */
+
+ ldp x28, x29, [sp, #16 * 14]
+ ldp x26, x27, [sp, #16 * 13]
+ ldp x24, x25, [sp, #16 * 12]
+ ldp x22, x23, [sp, #16 * 11]
+ ldp x20, x21, [sp, #16 * 10]
+ ldp x18, x19, [sp, #16 * 9]
+ ldp x16, x17, [sp, #16 * 8]
+ ldp x14, x15, [sp, #16 * 7]
+ ldp x12, x13, [sp, #16 * 6]
+ ldp x10, x11, [sp, #16 * 5]
+ ldp x8, x9, [sp, #16 * 4]
+ ldp x6, x7, [sp, #16 * 3]
+ ldp x4, x5, [sp, #16 * 2]
+ ldp x2, x3, [sp, #16 * 1]
+ ldp x0, x1, [sp, #16 * 0]
+
+ add sp, sp, #16 * 17
+
+ eret
+.endm
+
+.pushsection ".entry.text", "ax"
+.balign 0x800
+.global vectors
+vectors:
+.popsection
+
+.set vector, 0
+
+/*
+ * Build an exception handler for vector and append a jump to it into
+ * vectors (while making sure that it's 0x80 aligned).
+ */
+.macro HANDLER, label
+handler_\label:
+ save_registers
+ mov x0, sp
+ mov x1, #vector
+ bl route_exception
+ restore_registers
+
+.pushsection ".entry.text", "ax"
+.balign 0x80
+ b handler_\label
+.popsection
+
+.set vector, vector + 1
+.endm
+
+.macro HANDLER_INVALID
+.pushsection ".entry.text", "ax"
+.balign 0x80
+/* This will abort so no need to save and restore registers. */
+ mov x0, #vector
+ mov x1, #0 /* ec */
+ mov x2, #0 /* valid_ec */
+ b kvm_exit_unexpected_exception
+.popsection
+
+.set vector, vector + 1
+.endm
+
+/*
+ * Caution: be sure to not add anything between the declaration of vectors
+ * above and these macro calls that will build the vectors table below it.
+ */
+ HANDLER_INVALID // Synchronous EL1t
+ HANDLER_INVALID // IRQ EL1t
+ HANDLER_INVALID // FIQ EL1t
+ HANDLER_INVALID // Error EL1t
+
+ HANDLER el1h_sync // Synchronous EL1h
+ HANDLER el1h_irq // IRQ EL1h
+ HANDLER el1h_fiq // FIQ EL1h
+ HANDLER el1h_error // Error EL1h
+
+ HANDLER el0_sync_64 // Synchronous 64-bit EL0
+ HANDLER el0_irq_64 // IRQ 64-bit EL0
+ HANDLER el0_fiq_64 // FIQ 64-bit EL0
+ HANDLER el0_error_64 // Error 64-bit EL0
+
+ HANDLER el0_sync_32 // Synchronous 32-bit EL0
+ HANDLER el0_irq_32 // IRQ 32-bit EL0
+ HANDLER el0_fiq_32 // FIQ 32-bit EL0
+ HANDLER el0_error_32 // Error 32-bit EL0
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index cee92d477dc0..632b74d6b3ca 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -6,14 +6,16 @@
*/
#include <linux/compiler.h>
+#include <assert.h>
#include "kvm_util.h"
#include "../kvm_util_internal.h"
#include "processor.h"
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
+static vm_vaddr_t exception_handlers;
+
static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
{
return (v + vm->page_size) & ~(vm->page_size - 1);
@@ -72,19 +74,19 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
return 1 << (vm->page_shift - 3);
}
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+void virt_pgd_alloc(struct kvm_vm *vm)
{
if (!vm->pgd_created) {
vm_paddr_t paddr = vm_phy_pages_alloc(vm,
page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
vm->pgd = paddr;
vm->pgd_created = true;
}
}
-void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot, uint64_t flags)
+static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t flags)
{
uint8_t attr_idx = flags & 7;
uint64_t *ptep;
@@ -104,25 +106,19 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
paddr, vm->max_gfn, vm->page_size);
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = vm_alloc_page_table(vm) | 3;
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = vm_alloc_page_table(vm) | 3;
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = vm_alloc_page_table(vm) | 3;
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
@@ -135,12 +131,11 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
*ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot)
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
- _virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx);
+ _virt_pg_map(vm, vaddr, paddr, attr_idx);
}
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
@@ -302,7 +297,7 @@ void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
DEFAULT_STACK_PGS * vm->page_size :
vm->page_size;
uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
- DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
+ DEFAULT_ARM64_GUEST_STACK_VADDR_MIN);
vm_vcpu_add(vm, vcpuid);
aarch64_vcpu_setup(vm, vcpuid, init);
@@ -334,6 +329,100 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
va_end(ap);
}
+void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
+{
+ ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
+ while (1)
+ ;
+}
+
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
{
+ struct ucall uc;
+
+ if (get_ucall(vm, vcpuid, &uc) != UCALL_UNHANDLED)
+ return;
+
+ if (uc.args[2]) /* valid_ec */ {
+ assert(VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+ uc.args[0], uc.args[1]);
+ } else {
+ assert(!VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx)",
+ uc.args[0]);
+ }
+}
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM];
+};
+
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ extern char vectors;
+
+ set_reg(vm, vcpuid, ARM64_SYS_REG(VBAR_EL1), (uint64_t)&vectors);
+}
+
+void route_exception(struct ex_regs *regs, int vector)
+{
+ struct handlers *handlers = (struct handlers *)exception_handlers;
+ bool valid_ec;
+ int ec = 0;
+
+ switch (vector) {
+ case VECTOR_SYNC_CURRENT:
+ case VECTOR_SYNC_LOWER_64:
+ ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK;
+ valid_ec = true;
+ break;
+ case VECTOR_IRQ_CURRENT:
+ case VECTOR_IRQ_LOWER_64:
+ case VECTOR_FIQ_CURRENT:
+ case VECTOR_FIQ_LOWER_64:
+ case VECTOR_ERROR_CURRENT:
+ case VECTOR_ERROR_LOWER_64:
+ ec = 0;
+ valid_ec = false;
+ break;
+ default:
+ valid_ec = false;
+ goto unexpected_exception;
+ }
+
+ if (handlers && handlers->exception_handlers[vector][ec])
+ return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+ kvm_exit_unexpected_exception(vector, ec, valid_ec);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers),
+ vm->page_size);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ assert(ec < ESR_EC_NUM);
+ handlers->exception_handlers[vector][ec] = handler;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(!VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector][0] = handler;
}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index 2f37b90ee1a9..e0b0164e9af8 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -14,7 +14,7 @@ static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
return false;
- virt_pg_map(vm, gpa, gpa, 0);
+ virt_pg_map(vm, gpa, gpa);
ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
sync_global_to_guest(vm, ucall_exit_mmio_addr);
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index bc75a91e00a6..eac44f5d0db0 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -111,8 +111,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
* by the image and it needs to have sufficient available physical pages, to
* back the virtual pages used to load the image.
*/
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- uint32_t data_memslot, uint32_t pgd_memslot)
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
{
off_t offset, offset_rv;
Elf64_Ehdr hdr;
@@ -164,8 +163,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
seg_vend |= vm->page_size - 1;
size_t seg_size = seg_vend - seg_vstart + 1;
- vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
- data_memslot, pgd_memslot);
+ vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart);
TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
"virtual memory for segment at requested min addr,\n"
" segment idx: %u\n"
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
index 25bff307c71f..c330f414ef96 100644
--- a/tools/testing/selftests/kvm/lib/guest_modes.c
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -22,6 +22,22 @@ void guest_modes_append_default(void)
}
}
#endif
+#ifdef __s390x__
+ {
+ int kvm_fd, vm_fd;
+ struct kvm_s390_vm_cpu_processor info;
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, 0);
+ kvm_device_access(vm_fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_PROCESSOR, &info, false);
+ close(vm_fd);
+ close(kvm_fd);
+ /* Starting with z13 we have 47bits of physical address */
+ if (info.ibc >= 0x30)
+ guest_mode_append(VM_MODE_P47V64_4K, true, true);
+ }
+#endif
}
void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index a2b732cf96ea..10a8ed691c66 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -176,6 +176,7 @@ const char *vm_guest_mode_string(uint32_t i)
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
[VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
+ [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
};
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
"Missing new mode strings?");
@@ -194,6 +195,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
{ 40, 48, 0x10000, 16 },
{ 0, 0, 0x1000, 12 },
{ 47, 64, 0x1000, 12 },
+ { 44, 64, 0x1000, 12 },
};
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
"Missing new mode params?");
@@ -282,6 +284,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
case VM_MODE_P47V64_4K:
vm->pgtable_levels = 5;
break;
+ case VM_MODE_P44V64_4K:
+ vm->pgtable_levels = 5;
+ break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
}
@@ -365,7 +370,7 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
pages = vm_adjust_num_guest_pages(mode, pages);
vm = vm_create(mode, pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ kvm_vm_elf_load(vm, program_invocation_name);
#ifdef __x86_64__
vm_create_irqchip(vm);
@@ -375,10 +380,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
uint32_t vcpuid = vcpuids ? vcpuids[i] : i;
vm_vcpu_add_default(vm, vcpuid, guest_code);
-
-#ifdef __x86_64__
- vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
-#endif
}
return vm;
@@ -1251,15 +1252,13 @@ va_found:
* a unique set of pages, with the minimum real allocation being at least
* a page.
*/
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot)
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
{
uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
- virt_pgd_alloc(vm, pgd_memslot);
+ virt_pgd_alloc(vm);
vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
- KVM_UTIL_MIN_PFN * vm->page_size,
- data_memslot);
+ KVM_UTIL_MIN_PFN * vm->page_size, 0);
/*
* Find an unused range of virtual page addresses of at least
@@ -1271,7 +1270,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
pages--, vaddr += vm->page_size, paddr += vm->page_size) {
- virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ virt_pg_map(vm, vaddr, paddr);
sparsebit_set(vm->vpages_mapped,
vaddr >> vm->page_shift);
@@ -1281,6 +1280,44 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
}
/*
+ * VM Virtual Address Allocate Pages
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least N system pages worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
+{
+ return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
+}
+
+/*
+ * VM Virtual Address Allocate Page
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least one system page worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
+{
+ return vm_vaddr_alloc_pages(vm, 1);
+}
+
+/*
* Map a range of VM virtual address to the VM's physical address
*
* Input Args:
@@ -1298,7 +1335,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
* @npages starting at @vaddr to the page range starting at @paddr.
*/
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages, uint32_t pgd_memslot)
+ unsigned int npages)
{
size_t page_size = vm->page_size;
size_t size = npages * page_size;
@@ -1307,7 +1344,7 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ virt_pg_map(vm, vaddr, paddr);
vaddr += page_size;
paddr += page_size;
}
@@ -2177,6 +2214,14 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
}
+/* Arbitrary minimum physical address used for virtual translation tables. */
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
+{
+ return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+}
+
/*
* Address Guest Virtual to Host Virtual
*
@@ -2286,3 +2331,15 @@ unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
return vm_adjust_num_guest_pages(mode, n);
}
+
+int vm_get_stats_fd(struct kvm_vm *vm)
+{
+ return ioctl(vm->fd, KVM_GET_STATS_FD, NULL);
+}
+
+int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+ return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL);
+}
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 7397ca299835..0ef80dbdc116 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -50,11 +50,12 @@ static void guest_code(uint32_t vcpu_id)
}
struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
- uint64_t vcpu_memory_bytes,
+ uint64_t vcpu_memory_bytes, int slots,
enum vm_mem_backing_src_type backing_src)
{
struct kvm_vm *vm;
uint64_t guest_num_pages;
+ int i;
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
@@ -68,6 +69,9 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
"Guest memory size is not host page size aligned.");
TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
"Guest memory size is not guest page size aligned.");
+ TEST_ASSERT(guest_num_pages % slots == 0,
+ "Guest memory cannot be evenly divided into %d slots.",
+ slots);
vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
(vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
@@ -95,13 +99,19 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
#endif
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
- /* Add an extra memory slot for testing */
- vm_userspace_mem_region_add(vm, backing_src, guest_test_phys_mem,
- PERF_TEST_MEM_SLOT_INDEX,
- guest_num_pages, 0);
+ /* Add extra memory slots for testing */
+ for (i = 0; i < slots; i++) {
+ uint64_t region_pages = guest_num_pages / slots;
+ vm_paddr_t region_start = guest_test_phys_mem +
+ region_pages * perf_test_args.guest_page_size * i;
+
+ vm_userspace_mem_region_add(vm, backing_src, region_start,
+ PERF_TEST_MEM_SLOT_INDEX + i,
+ region_pages, 0);
+ }
/* Do mapping for the demand paging memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
ucall_init(vm, NULL);
@@ -140,6 +150,8 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus,
vcpu_gpa = guest_test_phys_mem;
}
+ vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+
pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
vcpu_id, vcpu_gpa, vcpu_gpa +
(vcpu_args->pages * perf_test_args.guest_page_size));
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 0152f356c099..f87c7137598e 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -9,11 +9,9 @@
#include "kvm_util.h"
#include "../kvm_util_internal.h"
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
#define PAGES_PER_REGION 4
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
+void virt_pgd_alloc(struct kvm_vm *vm)
{
vm_paddr_t paddr;
@@ -24,7 +22,7 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
return;
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
vm->pgd = paddr;
@@ -36,12 +34,12 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
* a page table (ri == 4). Returns a suitable region/segment table entry
* which points to the freshly allocated pages.
*/
-static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
{
uint64_t taddr;
taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
return (taddr & REGION_ENTRY_ORIGIN)
@@ -49,8 +47,7 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
- uint32_t memslot)
+void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
{
int ri, idx;
uint64_t *entry;
@@ -77,7 +74,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
if (entry[idx] & REGION_ENTRY_INVALID)
- entry[idx] = virt_alloc_region(vm, ri, memslot);
+ entry[idx] = virt_alloc_region(vm, ri);
entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
}
@@ -170,7 +167,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
vm->page_size);
stack_vaddr = vm_vaddr_alloc(vm, stack_size,
- DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+ DEFAULT_GUEST_STACK_VADDR_MIN);
vm_vcpu_add(vm, vcpuid);
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index af1031fed97f..b72429108993 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -11,6 +11,7 @@
#include <stdlib.h>
#include <time.h>
#include <sys/stat.h>
+#include <sys/syscall.h>
#include <linux/mman.h>
#include "linux/kernel.h"
@@ -129,13 +130,16 @@ size_t get_trans_hugepagesz(void)
{
size_t size;
FILE *f;
+ int ret;
TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size");
- fscanf(f, "%ld", &size);
+ ret = fscanf(f, "%ld", &size);
+ ret = fscanf(f, "%ld", &size);
+ TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size");
fclose(f);
return size;
@@ -279,13 +283,22 @@ size_t get_backing_src_pagesz(uint32_t i)
}
}
-void backing_src_help(void)
+static void print_available_backing_src_types(const char *prefix)
{
int i;
- printf("Available backing src types:\n");
+ printf("%sAvailable backing src types:\n", prefix);
+
for (i = 0; i < NUM_SRC_TYPES; i++)
- printf("\t%s\n", vm_mem_backing_src_alias(i)->name);
+ printf("%s %s\n", prefix, vm_mem_backing_src_alias(i)->name);
+}
+
+void backing_src_help(const char *flag)
+{
+ printf(" %s: specify the type of memory that should be used to\n"
+ " back the guest data region. (default: %s)\n",
+ flag, vm_mem_backing_src_alias(DEFAULT_VM_MEM_SRC)->name);
+ print_available_backing_src_types(" ");
}
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
@@ -296,7 +309,23 @@ enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name))
return i;
- backing_src_help();
+ print_available_backing_src_types("");
TEST_FAIL("Unknown backing src type: %s", type_name);
return -1;
}
+
+long get_run_delay(void)
+{
+ char path[64];
+ long val[2];
+ FILE *fp;
+
+ sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+ fp = fopen(path, "r");
+ /* Return MIN_RUN_DELAY_NS upon failure just to be safe */
+ if (fscanf(fp, "%ld %ld ", &val[0], &val[1]) < 2)
+ val[1] = MIN_RUN_DELAY_NS;
+ fclose(fp);
+
+ return val[1];
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c
new file mode 100644
index 000000000000..7168e25c194e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/x86_64/processor.c
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include "apic.h"
+
+void apic_disable(void)
+{
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) &
+ ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
+}
+
+void xapic_enable(void)
+{
+ uint64_t val = rdmsr(MSR_IA32_APICBASE);
+
+ /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
+ if (val & MSR_IA32_APICBASE_EXTD) {
+ apic_disable();
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
+ } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+ wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
+ }
+
+ /*
+ * Per SDM: reset value of spurious interrupt vector register has the
+ * APIC software enabled bit=0. It must be enabled in addition to the
+ * enable bit in the MSR.
+ */
+ val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
+ xapic_write_reg(APIC_SPIV, val);
+}
+
+void x2apic_enable(void)
+{
+ wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+ MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+ x2apic_write_reg(APIC_SPIV,
+ x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index efe235044421..28cb881f440d 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -17,13 +17,10 @@
#define DEFAULT_CODE_SELECTOR 0x8
#define DEFAULT_DATA_SELECTOR 0x10
-/* Minimum physical address used for virtual translation tables. */
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
vm_vaddr_t exception_handlers;
/* Virtual translation table structure declarations */
-struct pageMapL4Entry {
+struct pageUpperEntry {
uint64_t present:1;
uint64_t writable:1;
uint64_t user:1;
@@ -33,37 +30,7 @@ struct pageMapL4Entry {
uint64_t ignored_06:1;
uint64_t page_size:1;
uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageDirectoryPointerEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageDirectoryEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
+ uint64_t pfn:40;
uint64_t ignored_62_52:11;
uint64_t execute_disable:1;
};
@@ -79,7 +46,7 @@ struct pageTableEntry {
uint64_t reserved_07:1;
uint64_t global:1;
uint64_t ignored_11_09:3;
- uint64_t address:40;
+ uint64_t pfn:40;
uint64_t ignored_62_52:11;
uint64_t execute_disable:1;
};
@@ -207,96 +174,211 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
}
}
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+void virt_pgd_alloc(struct kvm_vm *vm)
{
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
/* If needed, create page map l4 table. */
if (!vm->pgd_created) {
- vm_paddr_t paddr = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- vm->pgd = paddr;
+ vm->pgd = vm_alloc_page_table(vm);
vm->pgd_created = true;
}
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot)
+static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
+ int level)
+{
+ uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
+ int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu;
+
+ return &page_table[index];
+}
+
+static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
+ uint64_t pt_pfn,
+ uint64_t vaddr,
+ uint64_t paddr,
+ int level,
+ enum x86_page_size page_size)
+{
+ struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+
+ if (!pte->present) {
+ pte->writable = true;
+ pte->present = true;
+ pte->page_size = (level == page_size);
+ if (pte->page_size)
+ pte->pfn = paddr >> vm->page_shift;
+ else
+ pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(level != page_size,
+ "Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
+ page_size, vaddr);
+ TEST_ASSERT(!pte->page_size,
+ "Cannot create page table at level: %u, vaddr: 0x%lx\n",
+ level, vaddr);
+ }
+ return pte;
+}
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ enum x86_page_size page_size)
+{
+ const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
+ struct pageUpperEntry *pml4e, *pdpe, *pde;
+ struct pageTableEntry *pte;
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
+ "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((vaddr % pg_size) == 0,
+ "Virtual address not aligned,\n"
+ "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % pg_size) == 0,
+ "Physical address not aligned,\n"
+ " paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ /*
+ * Allocate upper level page tables, if not already present. Return
+ * early if a hugepage was created.
+ */
+ pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
+ vaddr, paddr, 3, page_size);
+ if (pml4e->page_size)
+ return;
+
+ pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
+ if (pdpe->page_size)
+ return;
+
+ pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
+ if (pde->page_size)
+ return;
+
+ /* Fill in page table entry. */
+ pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
+ TEST_ASSERT(!pte->present,
+ "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
+ pte->pfn = paddr >> vm->page_shift;
+ pte->writable = true;
+ pte->present = 1;
+}
+
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ __virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
+}
+
+static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
+ uint64_t vaddr)
{
uint16_t index[4];
- struct pageMapL4Entry *pml4e;
+ struct pageUpperEntry *pml4e, *pdpe, *pde;
+ struct pageTableEntry *pte;
+ struct kvm_cpuid_entry2 *entry;
+ struct kvm_sregs sregs;
+ int max_phy_addr;
+ /* Set the bottom 52 bits. */
+ uint64_t rsvd_mask = 0x000fffffffffffff;
+
+ entry = kvm_get_supported_cpuid_index(0x80000008, 0);
+ max_phy_addr = entry->eax & 0x000000ff;
+ /* Clear the bottom bits of the reserved mask. */
+ rsvd_mask = (rsvd_mask >> max_phy_addr) << max_phy_addr;
+
+ /*
+ * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
+ * with 4-Level Paging and 5-Level Paging".
+ * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1,
+ * the XD flag (bit 63) is reserved.
+ */
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+ if ((sregs.efer & EFER_NX) == 0) {
+ rsvd_mask |= (1ull << 63);
+ }
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- TEST_ASSERT((vaddr % vm->page_size) == 0,
- "Virtual address not on page boundary,\n"
- " vaddr: 0x%lx vm->page_size: 0x%x",
- vaddr, vm->page_size);
TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
(vaddr >> vm->page_shift)),
"Invalid virtual address, vaddr: 0x%lx",
vaddr);
- TEST_ASSERT((paddr % vm->page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- paddr, vm->page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
+ /*
+ * Based on the mode check above there are 48 bits in the vaddr, so
+ * shift 16 to sign extend the last bit (bit-47),
+ */
+ TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
+ "Canonical check failed. The virtual address is invalid.");
index[0] = (vaddr >> 12) & 0x1ffu;
index[1] = (vaddr >> 21) & 0x1ffu;
index[2] = (vaddr >> 30) & 0x1ffu;
index[3] = (vaddr >> 39) & 0x1ffu;
- /* Allocate page directory pointer table if not present. */
pml4e = addr_gpa2hva(vm, vm->pgd);
- if (!pml4e[index[3]].present) {
- pml4e[index[3]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pml4e[index[3]].writable = true;
- pml4e[index[3]].present = true;
- }
+ TEST_ASSERT(pml4e[index[3]].present,
+ "Expected pml4e to be present for gva: 0x%08lx", vaddr);
+ TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
+ (rsvd_mask | (1ull << 7))) == 0,
+ "Unexpected reserved bits set.");
+
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
+ TEST_ASSERT(pdpe[index[2]].present,
+ "Expected pdpe to be present for gva: 0x%08lx", vaddr);
+ TEST_ASSERT(pdpe[index[2]].page_size == 0,
+ "Expected pdpe to map a pde not a 1-GByte page.");
+ TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
+ "Unexpected reserved bits set.");
+
+ pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
+ TEST_ASSERT(pde[index[1]].present,
+ "Expected pde to be present for gva: 0x%08lx", vaddr);
+ TEST_ASSERT(pde[index[1]].page_size == 0,
+ "Expected pde to map a pte not a 2-MByte page.");
+ TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
+ "Unexpected reserved bits set.");
+
+ pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
+ TEST_ASSERT(pte[index[0]].present,
+ "Expected pte to be present for gva: 0x%08lx", vaddr);
+
+ return &pte[index[0]];
+}
- /* Allocate page directory table if not present. */
- struct pageDirectoryPointerEntry *pdpe;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].present) {
- pdpe[index[2]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pdpe[index[2]].writable = true;
- pdpe[index[2]].present = true;
- }
+uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
+{
+ struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
- /* Allocate page table if not present. */
- struct pageDirectoryEntry *pde;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].present) {
- pde[index[1]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pde[index[1]].writable = true;
- pde[index[1]].present = true;
- }
+ return *(uint64_t *)pte;
+}
- /* Fill in page table entry. */
- struct pageTableEntry *pte;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- pte[index[0]].address = paddr >> vm->page_shift;
- pte[index[0]].writable = true;
- pte[index[0]].present = 1;
+void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
+ uint64_t pte)
+{
+ struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
+ vaddr);
+
+ *(uint64_t *)new_pte = pte;
}
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
- struct pageMapL4Entry *pml4e, *pml4e_start;
- struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
- struct pageDirectoryEntry *pde, *pde_start;
+ struct pageUpperEntry *pml4e, *pml4e_start;
+ struct pageUpperEntry *pdpe, *pdpe_start;
+ struct pageUpperEntry *pde, *pde_start;
struct pageTableEntry *pte, *pte_start;
if (!vm->pgd_created)
@@ -307,8 +389,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*s index hvaddr gpaddr "
"addr w exec dirty\n",
indent, "");
- pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
- vm->pgd);
+ pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
pml4e = &pml4e_start[n1];
if (!pml4e->present)
@@ -317,11 +398,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
" %u\n",
indent, "",
pml4e - pml4e_start, pml4e,
- addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
+ addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
pml4e->writable, pml4e->execute_disable);
- pdpe_start = addr_gpa2hva(vm, pml4e->address
- * vm->page_size);
+ pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
pdpe = &pdpe_start[n2];
if (!pdpe->present)
@@ -331,11 +411,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
indent, "",
pdpe - pdpe_start, pdpe,
addr_hva2gpa(vm, pdpe),
- (uint64_t) pdpe->address, pdpe->writable,
+ (uint64_t) pdpe->pfn, pdpe->writable,
pdpe->execute_disable);
- pde_start = addr_gpa2hva(vm,
- pdpe->address * vm->page_size);
+ pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
pde = &pde_start[n3];
if (!pde->present)
@@ -344,11 +423,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
"0x%-12lx 0x%-10lx %u %u\n",
indent, "", pde - pde_start, pde,
addr_hva2gpa(vm, pde),
- (uint64_t) pde->address, pde->writable,
+ (uint64_t) pde->pfn, pde->writable,
pde->execute_disable);
- pte_start = addr_gpa2hva(vm,
- pde->address * vm->page_size);
+ pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
pte = &pte_start[n4];
if (!pte->present)
@@ -359,7 +437,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
indent, "",
pte - pte_start, pte,
addr_hva2gpa(vm, pte),
- (uint64_t) pte->address,
+ (uint64_t) pte->pfn,
pte->writable,
pte->execute_disable,
pte->dirty,
@@ -480,9 +558,7 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint16_t index[4];
- struct pageMapL4Entry *pml4e;
- struct pageDirectoryPointerEntry *pdpe;
- struct pageDirectoryEntry *pde;
+ struct pageUpperEntry *pml4e, *pdpe, *pde;
struct pageTableEntry *pte;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
@@ -499,43 +575,39 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
if (!pml4e[index[3]].present)
goto unmapped_gva;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
if (!pdpe[index[2]].present)
goto unmapped_gva;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+ pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
if (!pde[index[1]].present)
goto unmapped_gva;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+ pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
if (!pte[index[0]].present)
goto unmapped_gva;
- return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
+ return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
unmapped_gva:
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
exit(EXIT_FAILURE);
}
-static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
- int pgd_memslot)
+static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
{
if (!vm->gdt)
- vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
- KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+ vm->gdt = vm_vaddr_alloc_page(vm);
dt->base = vm->gdt;
dt->limit = getpagesize();
}
static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
- int selector, int gdt_memslot,
- int pgd_memslot)
+ int selector)
{
if (!vm->tss)
- vm->tss = vm_vaddr_alloc(vm, getpagesize(),
- KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+ vm->tss = vm_vaddr_alloc_page(vm);
memset(segp, 0, sizeof(*segp));
segp->base = vm->tss;
@@ -546,7 +618,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
kvm_seg_fill_gdt_64bit(vm, segp);
}
-static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+static void vcpu_setup(struct kvm_vm *vm, int vcpuid)
{
struct kvm_sregs sregs;
@@ -555,7 +627,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
sregs.idt.limit = 0;
- kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
+ kvm_setup_gdt(vm, &sregs.gdt);
switch (vm->mode) {
case VM_MODE_PXXV48_4K:
@@ -567,7 +639,7 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
- kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
+ kvm_setup_tss_64bit(vm, &sregs.tr, 0x18);
break;
default:
@@ -584,11 +656,11 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
struct kvm_regs regs;
vm_vaddr_t stack_vaddr;
stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
- DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+ DEFAULT_GUEST_STACK_VADDR_MIN);
/* Create VCPU */
vm_vcpu_add(vm, vcpuid);
- vcpu_setup(vm, vcpuid, 0, 0);
+ vcpu_setup(vm, vcpuid);
/* Setup guest general purpose registers */
vcpu_regs_get(vm, vcpuid, &regs);
@@ -600,6 +672,9 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
/* Setup the MP state */
mp_state.mp_state = 0;
vcpu_set_mp_state(vm, vcpuid, &mp_state);
+
+ /* Setup supported CPUIDs */
+ vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
}
/*
@@ -1201,7 +1276,7 @@ static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
void kvm_exit_unexpected_vector(uint32_t value)
{
- outl(UNEXPECTED_VECTOR_PORT, value);
+ ucall(UCALL_UNHANDLED, 1, value);
}
void route_exception(struct ex_regs *regs)
@@ -1222,8 +1297,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm)
extern void *idt_handlers;
int i;
- vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0);
- vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0);
+ vm->idt = vm_vaddr_alloc_page(vm);
+ vm->handlers = vm_vaddr_alloc_page(vm);
/* Handlers have the same address in both address spaces.*/
for (i = 0; i < NUM_INTERRUPTS; i++)
set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
@@ -1244,8 +1319,8 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
}
-void vm_handle_exception(struct kvm_vm *vm, int vector,
- void (*handler)(struct ex_regs *))
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
{
vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
@@ -1254,16 +1329,13 @@ void vm_handle_exception(struct kvm_vm *vm, int vector,
void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
{
- if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
- && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
- && vcpu_state(vm, vcpuid)->io.size == 4) {
- /* Grab pointer to io data */
- uint32_t *data = (void *)vcpu_state(vm, vcpuid)
- + vcpu_state(vm, vcpuid)->io.data_offset;
-
- TEST_ASSERT(false,
- "Unexpected vectored event in guest (vector:0x%x)",
- *data);
+ struct ucall uc;
+
+ if (get_ucall(vm, vcpuid, &uc) == UCALL_UNHANDLED) {
+ uint64_t vector = uc.args[0];
+
+ TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
+ vector);
}
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 827fe6028dd4..2ac98d70d02b 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -30,17 +30,14 @@ u64 rflags;
struct svm_test_data *
vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
{
- vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
- svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
- svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ svm->save_area = (void *)vm_vaddr_alloc_page(vm);
svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 2448b30e8efa..d089d8b850b5 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -77,50 +77,48 @@ int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
struct vmx_pages *
vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
{
- vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
/* Setup of a region of guest memory for the vmxon region. */
- vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
/* Setup of a region of guest memory for a vmcs. */
- vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
/* Setup of a region of guest memory for the MSR bitmap. */
- vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->msr = (void *)vm_vaddr_alloc_page(vm);
vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
memset(vmx->msr_hva, 0, getpagesize());
/* Setup of a region of guest memory for the shadow VMCS. */
- vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
/* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
- vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
memset(vmx->vmread_hva, 0, getpagesize());
- vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
memset(vmx->vmwrite_hva, 0, getpagesize());
/* Setup of a region of guest memory for the VP Assist page. */
- vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vmx->vp_assist = (void *)vm_vaddr_alloc_page(vm);
vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
/* Setup of a region of guest memory for the enlightened VMCS. */
- vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vmx->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->enlightened_vmcs_hva =
addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
vmx->enlightened_vmcs_gpa =
@@ -395,7 +393,7 @@ void nested_vmx_check_supported(void)
}
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
+ uint64_t nested_paddr, uint64_t paddr)
{
uint16_t index[4];
struct eptPageTableEntry *pml4e;
@@ -428,9 +426,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
/* Allocate page directory pointer table if not present. */
pml4e = vmx->eptp_hva;
if (!pml4e[index[3]].readable) {
- pml4e[index[3]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
+ pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift;
pml4e[index[3]].writable = true;
pml4e[index[3]].readable = true;
pml4e[index[3]].executable = true;
@@ -440,9 +436,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
struct eptPageTableEntry *pdpe;
pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
if (!pdpe[index[2]].readable) {
- pdpe[index[2]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
+ pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift;
pdpe[index[2]].writable = true;
pdpe[index[2]].readable = true;
pdpe[index[2]].executable = true;
@@ -452,9 +446,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
struct eptPageTableEntry *pde;
pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
if (!pde[index[1]].readable) {
- pde[index[1]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
+ pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift;
pde[index[1]].writable = true;
pde[index[1]].readable = true;
pde[index[1]].executable = true;
@@ -494,8 +486,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* page range starting at nested_paddr to the page range starting at paddr.
*/
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- uint32_t eptp_memslot)
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size)
{
size_t page_size = vm->page_size;
size_t npages = size / page_size;
@@ -504,7 +495,7 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot);
+ nested_pg_map(vmx, vm, nested_paddr, paddr);
nested_paddr += page_size;
paddr += page_size;
}
@@ -514,7 +505,7 @@ void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* physical pages in VM.
*/
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot, uint32_t eptp_memslot)
+ uint32_t memslot)
{
sparsebit_idx_t i, last;
struct userspace_mem_region *region =
@@ -530,24 +521,21 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
nested_map(vmx, vm,
(uint64_t)i << vm->page_shift,
(uint64_t)i << vm->page_shift,
- 1 << vm->page_shift,
- eptp_memslot);
+ 1 << vm->page_shift);
}
}
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot)
{
- vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
}
-void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t eptp_memslot)
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
{
- vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
}
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 98351ba0933c..4cfcafea9f5a 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -45,7 +45,6 @@ static void *vcpu_worker(void *data)
struct kvm_vm *vm = perf_test_args.vm;
struct kvm_run *run;
- vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
run = vcpu_state(vm, vcpu_id);
/* Let the guest access its memory until a stop signal is received */
@@ -105,7 +104,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct kvm_vm *vm;
int vcpu_id;
- vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+ vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
VM_MEM_SRC_ANONYMOUS);
perf_test_args.wr_fract = 1;
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 11239652d805..d6e381e01db7 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -306,7 +306,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
guest_addr += npages * 4096;
}
- virt_map(data->vm, MEM_GPA, MEM_GPA, mempages, 0);
+ virt_map(data->vm, MEM_GPA, MEM_GPA, mempages);
sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
atomic_init(&sync->start_flag, false);
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
new file mode 100644
index 000000000000..4158da0da2bb
--- /dev/null
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <syscall.h>
+#include <sys/ioctl.h>
+#include <sys/sysinfo.h>
+#include <asm/barrier.h>
+#include <linux/atomic.h>
+#include <linux/rseq.h>
+#include <linux/unistd.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define VCPU_ID 0
+
+static __thread volatile struct rseq __rseq = {
+ .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+};
+
+/*
+ * Use an arbitrary, bogus signature for configuring rseq, this test does not
+ * actually enter an rseq critical section.
+ */
+#define RSEQ_SIG 0xdeadbeef
+
+/*
+ * Any bug related to task migration is likely to be timing-dependent; perform
+ * a large number of migrations to reduce the odds of a false negative.
+ */
+#define NR_TASK_MIGRATIONS 100000
+
+static pthread_t migration_thread;
+static cpu_set_t possible_mask;
+static int min_cpu, max_cpu;
+static bool done;
+
+static atomic_t seq_cnt;
+
+static void guest_code(void)
+{
+ for (;;)
+ GUEST_SYNC(0);
+}
+
+static void sys_rseq(int flags)
+{
+ int r;
+
+ r = syscall(__NR_rseq, &__rseq, sizeof(__rseq), flags, RSEQ_SIG);
+ TEST_ASSERT(!r, "rseq failed, errno = %d (%s)", errno, strerror(errno));
+}
+
+static int next_cpu(int cpu)
+{
+ /*
+ * Advance to the next CPU, skipping those that weren't in the original
+ * affinity set. Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's
+ * data storage is considered as opaque. Note, if this task is pinned
+ * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will
+ * burn a lot cycles and the test will take longer than normal to
+ * complete.
+ */
+ do {
+ cpu++;
+ if (cpu > max_cpu) {
+ cpu = min_cpu;
+ TEST_ASSERT(CPU_ISSET(cpu, &possible_mask),
+ "Min CPU = %d must always be usable", cpu);
+ break;
+ }
+ } while (!CPU_ISSET(cpu, &possible_mask));
+
+ return cpu;
+}
+
+static void *migration_worker(void *ign)
+{
+ cpu_set_t allowed_mask;
+ int r, i, cpu;
+
+ CPU_ZERO(&allowed_mask);
+
+ for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) {
+ CPU_SET(cpu, &allowed_mask);
+
+ /*
+ * Bump the sequence count twice to allow the reader to detect
+ * that a migration may have occurred in between rseq and sched
+ * CPU ID reads. An odd sequence count indicates a migration
+ * is in-progress, while a completely different count indicates
+ * a migration occurred since the count was last read.
+ */
+ atomic_inc(&seq_cnt);
+
+ /*
+ * Ensure the odd count is visible while sched_getcpu() isn't
+ * stable, i.e. while changing affinity is in-progress.
+ */
+ smp_wmb();
+ r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
+ errno, strerror(errno));
+ smp_wmb();
+ atomic_inc(&seq_cnt);
+
+ CPU_CLR(cpu, &allowed_mask);
+
+ /*
+ * Wait 1-10us before proceeding to the next iteration and more
+ * specifically, before bumping seq_cnt again. A delay is
+ * needed on three fronts:
+ *
+ * 1. To allow sched_setaffinity() to prompt migration before
+ * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME
+ * (or TIF_NEED_RESCHED, which indirectly leads to handling
+ * NOTIFY_RESUME) is handled in KVM context.
+ *
+ * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters
+ * the guest, the guest will trigger a IO/MMIO exit all the
+ * way to userspace and the TIF flags will be handled by
+ * the generic "exit to userspace" logic, not by KVM. The
+ * exit to userspace is necessary to give the test a chance
+ * to check the rseq CPU ID (see #2).
+ *
+ * Alternatively, guest_code() could include an instruction
+ * to trigger an exit that is handled by KVM, but any such
+ * exit requires architecture specific code.
+ *
+ * 2. To let ioctl(KVM_RUN) make its way back to the test
+ * before the next round of migration. The test's check on
+ * the rseq CPU ID must wait for migration to complete in
+ * order to avoid false positive, thus any kernel rseq bug
+ * will be missed if the next migration starts before the
+ * check completes.
+ *
+ * 3. To ensure the read-side makes efficient forward progress,
+ * e.g. if sched_getcpu() involves a syscall. Stalling the
+ * read-side means the test will spend more time waiting for
+ * sched_getcpu() to stabilize and less time trying to hit
+ * the timing-dependent bug.
+ *
+ * Because any bug in this area is likely to be timing-dependent,
+ * run with a range of delays at 1us intervals from 1us to 10us
+ * as a best effort to avoid tuning the test to the point where
+ * it can hit _only_ the original bug and not detect future
+ * regressions.
+ *
+ * The original bug can reproduce with a delay up to ~500us on
+ * x86-64, but starts to require more iterations to reproduce
+ * as the delay creeps above ~10us, and the average runtime of
+ * each iteration obviously increases as well. Cap the delay
+ * at 10us to keep test runtime reasonable while minimizing
+ * potential coverage loss.
+ *
+ * The lower bound for reproducing the bug is likely below 1us,
+ * e.g. failures occur on x86-64 with nanosleep(0), but at that
+ * point the overhead of the syscall likely dominates the delay.
+ * Use usleep() for simplicity and to avoid unnecessary kernel
+ * dependencies.
+ */
+ usleep((i % 10) + 1);
+ }
+ done = true;
+ return NULL;
+}
+
+static int calc_min_max_cpu(void)
+{
+ int i, cnt, nproc;
+
+ if (CPU_COUNT(&possible_mask) < 2)
+ return -EINVAL;
+
+ /*
+ * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
+ * this task is affined to in order to reduce the time spent querying
+ * unusable CPUs, e.g. if this task is pinned to a small percentage of
+ * total CPUs.
+ */
+ nproc = get_nprocs_conf();
+ min_cpu = -1;
+ max_cpu = -1;
+ cnt = 0;
+
+ for (i = 0; i < nproc; i++) {
+ if (!CPU_ISSET(i, &possible_mask))
+ continue;
+ if (min_cpu == -1)
+ min_cpu = i;
+ max_cpu = i;
+ cnt++;
+ }
+
+ return (cnt < 2) ? -EINVAL : 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int r, i, snapshot;
+ struct kvm_vm *vm;
+ u32 cpu, rseq_cpu;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
+ TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
+ strerror(errno));
+
+ if (calc_min_max_cpu()) {
+ print_skip("Only one usable CPU, task migration not possible");
+ exit(KSFT_SKIP);
+ }
+
+ sys_rseq(0);
+
+ /*
+ * Create and run a dummy VM that immediately exits to userspace via
+ * GUEST_SYNC, while concurrently migrating the process by setting its
+ * CPU affinity.
+ */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ ucall_init(vm, NULL);
+
+ pthread_create(&migration_thread, NULL, migration_worker, 0);
+
+ for (i = 0; !done; i++) {
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+ "Guest failed?");
+
+ /*
+ * Verify rseq's CPU matches sched's CPU. Ensure migration
+ * doesn't occur between sched_getcpu() and reading the rseq
+ * cpu_id by rereading both if the sequence count changes, or
+ * if the count is odd (migration in-progress).
+ */
+ do {
+ /*
+ * Drop bit 0 to force a mismatch if the count is odd,
+ * i.e. if a migration is in-progress.
+ */
+ snapshot = atomic_read(&seq_cnt) & ~1;
+
+ /*
+ * Ensure reading sched_getcpu() and rseq.cpu_id
+ * complete in a single "no migration" window, i.e. are
+ * not reordered across the seq_cnt reads.
+ */
+ smp_rmb();
+ cpu = sched_getcpu();
+ rseq_cpu = READ_ONCE(__rseq.cpu_id);
+ smp_rmb();
+ } while (snapshot != atomic_read(&seq_cnt));
+
+ TEST_ASSERT(rseq_cpu == cpu,
+ "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu);
+ }
+
+ /*
+ * Sanity check that the test was able to enter the guest a reasonable
+ * number of times, e.g. didn't get stalled too often/long waiting for
+ * sched_getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a
+ * fairly conservative ratio on x86-64, which can do _more_ KVM_RUNs
+ * than migrations given the 1us+ delay in the migration task.
+ */
+ TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2),
+ "Only performed %d KVM_RUNs, task stalled too much?\n", i);
+
+ pthread_join(migration_thread, NULL);
+
+ kvm_vm_free(vm);
+
+ sys_rseq(RSEQ_FLAG_UNREGISTER);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index d8812f27648c..72a1c9b4882c 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -132,7 +132,7 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
- virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+ virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2);
/* Ditto for the host mapping so that both pages can be zeroed. */
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
@@ -377,7 +377,8 @@ static void test_add_max_memory_regions(void)
(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index fcc840088c91..62f2eb9ee3d5 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -10,7 +10,6 @@
#include <sched.h>
#include <pthread.h>
#include <linux/kernel.h>
-#include <sys/syscall.h>
#include <asm/kvm.h>
#include <asm/kvm_para.h>
@@ -20,7 +19,6 @@
#define NR_VCPUS 4
#define ST_GPA_BASE (1 << 30)
-#define MIN_RUN_DELAY_NS 200000UL
static void *st_gva[NR_VCPUS];
static uint64_t guest_stolen_time[NR_VCPUS];
@@ -73,8 +71,6 @@ static void steal_time_init(struct kvm_vm *vm)
for (i = 0; i < NR_VCPUS; ++i) {
int ret;
- vcpu_set_cpuid(vm, i, kvm_get_supported_cpuid());
-
/* ST_GPA_BASE is identity mapped */
st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
sync_global_to_guest(vm, st_gva[i]);
@@ -120,12 +116,12 @@ struct st_time {
uint64_t st_time;
};
-static int64_t smccc(uint32_t func, uint32_t arg)
+static int64_t smccc(uint32_t func, uint64_t arg)
{
unsigned long ret;
asm volatile(
- "mov x0, %1\n"
+ "mov w0, %w1\n"
"mov x1, %2\n"
"hvc #0\n"
"mov %0, x0\n"
@@ -219,20 +215,6 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
#endif
-static long get_run_delay(void)
-{
- char path[64];
- long val[2];
- FILE *fp;
-
- sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
- fp = fopen(path, "r");
- fscanf(fp, "%ld %ld ", &val[0], &val[1]);
- fclose(fp);
-
- return val[1];
-}
-
static void *do_steal_time(void *arg)
{
struct timespec ts, stop;
@@ -295,7 +277,7 @@ int main(int ac, char **av)
vm = vm_create_default(0, 0, guest_code);
gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
- virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages, 0);
+ virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages);
ucall_init(vm, NULL);
/* Add the rest of the VCPUs */
@@ -322,7 +304,7 @@ int main(int ac, char **av)
run_delay = get_run_delay();
pthread_create(&thread, &attr, do_steal_time, NULL);
do
- pthread_yield();
+ sched_yield();
while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS);
pthread_join(thread, NULL);
run_delay = get_run_delay() - run_delay;
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index 6097a8283377..5f078db1bcba 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -8,12 +8,15 @@
#include <string.h>
#include "kvm_util.h"
#include "processor.h"
+#include "apic.h"
#define VCPU_ID 0
#define DR6_BD (1 << 13)
#define DR7_GD (1 << 13)
+#define IRQ_VECTOR 0xAA
+
/* For testing data access debug BP */
uint32_t guest_value;
@@ -21,6 +24,11 @@ extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
static void guest_code(void)
{
+ /* Create a pending interrupt on current vCPU */
+ x2apic_enable();
+ x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
+ APIC_DM_FIXED | IRQ_VECTOR);
+
/*
* Software BP tests.
*
@@ -38,12 +46,19 @@ static void guest_code(void)
"mov %%rax,%0;\n\t write_data:"
: "=m" (guest_value) : : "rax");
- /* Single step test, covers 2 basic instructions and 2 emulated */
+ /*
+ * Single step test, covers 2 basic instructions and 2 emulated
+ *
+ * Enable interrupts during the single stepping to see that
+ * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ
+ */
asm volatile("ss_start: "
+ "sti\n\t"
"xor %%eax,%%eax\n\t"
"cpuid\n\t"
"movl $0x1a0,%%ecx\n\t"
"rdmsr\n\t"
+ "cli\n\t"
: : : "eax", "ebx", "ecx", "edx");
/* DR6.BD test */
@@ -72,11 +87,13 @@ int main(void)
uint64_t cmd;
int i;
/* Instruction lengths starting at ss_start */
- int ss_size[4] = {
+ int ss_size[6] = {
+ 1, /* sti*/
2, /* xor */
2, /* cpuid */
5, /* mov */
2, /* rdmsr */
+ 1, /* cli */
};
if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
@@ -154,7 +171,8 @@ int main(void)
for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
target_rip += ss_size[i];
CLEAR_DEBUG();
- debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
+ KVM_GUESTDBG_BLOCKIRQ;
debug.arch.debugreg[7] = 0x00000400;
APPLY_DEBUG();
vcpu_run(vm, VCPU_ID);
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
new file mode 100644
index 000000000000..f070ff0224fa
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM_CAP_EXIT_ON_EMULATION_FAILURE capability.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define VCPU_ID 1
+#define PAGE_SIZE 4096
+#define MAXPHYADDR 36
+
+#define MEM_REGION_GVA 0x0000123456789000
+#define MEM_REGION_GPA 0x0000000700000000
+#define MEM_REGION_SLOT 10
+#define MEM_REGION_SIZE PAGE_SIZE
+
+static void guest_code(void)
+{
+ __asm__ __volatile__("flds (%[addr])"
+ :: [addr]"r"(MEM_REGION_GVA));
+
+ GUEST_DONE();
+}
+
+static void run_guest(struct kvm_vm *vm)
+{
+ int rc;
+
+ rc = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
+}
+
+/*
+ * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2,
+ * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)".
+ */
+#define GET_RM(insn_byte) (insn_byte & 0x7)
+#define GET_REG(insn_byte) ((insn_byte & 0x38) >> 3)
+#define GET_MOD(insn_byte) ((insn_byte & 0xc) >> 6)
+
+/* Ensure we are dealing with a simple 2-byte flds instruction. */
+static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size)
+{
+ return insn_size >= 2 &&
+ insn_bytes[0] == 0xd9 &&
+ GET_REG(insn_bytes[1]) == 0x0 &&
+ GET_MOD(insn_bytes[1]) == 0x0 &&
+ /* Ensure there is no SIB byte. */
+ GET_RM(insn_bytes[1]) != 0x4 &&
+ /* Ensure there is no displacement byte. */
+ GET_RM(insn_bytes[1]) != 0x5;
+}
+
+static void process_exit_on_emulation_error(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct kvm_regs regs;
+ uint8_t *insn_bytes;
+ uint8_t insn_size;
+ uint64_t flags;
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Unexpected suberror: %u",
+ run->emulation_failure.suberror);
+
+ if (run->emulation_failure.ndata >= 1) {
+ flags = run->emulation_failure.flags;
+ if ((flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES) &&
+ run->emulation_failure.ndata >= 3) {
+ insn_size = run->emulation_failure.insn_size;
+ insn_bytes = run->emulation_failure.insn_bytes;
+
+ TEST_ASSERT(insn_size <= 15 && insn_size > 0,
+ "Unexpected instruction size: %u",
+ insn_size);
+
+ TEST_ASSERT(is_flds(insn_bytes, insn_size),
+ "Unexpected instruction. Expected 'flds' (0xd9 /0)");
+
+ /*
+ * If is_flds() succeeded then the instruction bytes
+ * contained an flds instruction that is 2-bytes in
+ * length (ie: no prefix, no SIB, no displacement).
+ */
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ regs.rip += 2;
+ vcpu_regs_set(vm, VCPU_ID, &regs);
+ }
+ }
+}
+
+static void do_guest_assert(struct kvm_vm *vm, struct ucall *uc)
+{
+ TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], __FILE__,
+ uc->args[1]);
+}
+
+static void check_for_guest_assert(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ if (run->exit_reason == KVM_EXIT_IO &&
+ get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) {
+ do_guest_assert(vm, &uc);
+ }
+}
+
+static void process_ucall_done(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ check_for_guest_assert(vm);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE,
+ "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+ uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_ABORT:
+ do_guest_assert(vm, &uc);
+ break;
+ case UCALL_DONE:
+ process_ucall_done(vm);
+ break;
+ default:
+ TEST_ASSERT(false, "Unexpected ucall");
+ }
+
+ return uc.cmd;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_enable_cap emul_failure_cap = {
+ .cap = KVM_CAP_EXIT_ON_EMULATION_FAILURE,
+ .args[0] = 1,
+ };
+ struct kvm_cpuid_entry2 *entry;
+ struct kvm_cpuid2 *cpuid;
+ struct kvm_vm *vm;
+ uint64_t gpa, pte;
+ uint64_t *hva;
+ int rc;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ if (!kvm_check_cap(KVM_CAP_SMALLER_MAXPHYADDR)) {
+ printf("module parameter 'allow_smaller_maxphyaddr' is not set. Skipping test.\n");
+ return 0;
+ }
+
+ cpuid = kvm_get_supported_cpuid();
+
+ entry = kvm_get_supported_cpuid_index(0x80000008, 0);
+ entry->eax = (entry->eax & 0xffffff00) | MAXPHYADDR;
+ set_cpuid(cpuid, entry);
+
+ vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+
+ rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
+ TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
+ vm_enable_cap(vm, &emul_failure_cap);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ MEM_REGION_GPA, MEM_REGION_SLOT,
+ MEM_REGION_SIZE / PAGE_SIZE, 0);
+ gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
+ MEM_REGION_GPA, MEM_REGION_SLOT);
+ TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+ virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
+ hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+ memset(hva, 0, PAGE_SIZE);
+ pte = vm_get_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA);
+ vm_set_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA, pte | (1ull << 36));
+
+ run_guest(vm);
+ process_exit_on_emulation_error(vm);
+ run_guest(vm);
+
+ TEST_ASSERT(process_ucall(vm) == UCALL_DONE, "Expected UCALL_DONE");
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 63096cea26c6..2b46dcca86a8 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -22,15 +22,6 @@
static int ud_count;
-void enable_x2apic(void)
-{
- uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4);
-
- wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
- MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
- wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED);
-}
-
static void guest_ud_handler(struct ex_regs *regs)
{
ud_count++;
@@ -59,7 +50,7 @@ void guest_code(struct vmx_pages *vmx_pages)
#define L2_GUEST_STACK_SIZE 64
unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
- enable_x2apic();
+ x2apic_enable();
GUEST_SYNC(1);
GUEST_SYNC(2);
@@ -121,14 +112,38 @@ void inject_nmi(struct kvm_vm *vm)
vcpu_events_set(vm, VCPU_ID, &events);
}
+static void save_restore_vm(struct kvm_vm *vm)
+{
+ struct kvm_regs regs1, regs2;
+ struct kvm_x86_state *state;
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID);
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+ vcpu_enable_evmcs(vm, VCPU_ID);
+ vcpu_load_state(vm, VCPU_ID, state);
+ free(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vm, VCPU_ID, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+}
+
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva = 0;
- struct kvm_regs regs1, regs2;
struct kvm_vm *vm;
struct kvm_run *run;
- struct kvm_x86_state *state;
struct ucall uc;
int stage;
@@ -145,21 +160,18 @@ int main(int argc, char *argv[])
vcpu_set_hv_cpuid(vm, VCPU_ID);
vcpu_enable_evmcs(vm, VCPU_ID);
- run = vcpu_state(vm, VCPU_ID);
-
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
vcpu_alloc_vmx(vm, &vmx_pages_gva);
vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
- vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler);
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+ vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
pr_info("Running L1 which uses EVMCS to run L2\n");
for (stage = 1;; stage++) {
+ run = vcpu_state(vm, VCPU_ID);
_vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Stage %d: unexpected exit reason: %u (%s),\n",
@@ -184,32 +196,23 @@ int main(int argc, char *argv[])
uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
stage, (ulong)uc.args[1]);
- state = vcpu_save_state(vm, VCPU_ID);
- memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_hv_cpuid(vm, VCPU_ID);
- vcpu_enable_evmcs(vm, VCPU_ID);
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
-
- memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
- TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
+ save_restore_vm(vm);
/* Force immediate L2->L1 exit before resuming */
if (stage == 8) {
pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
inject_nmi(vm);
}
+
+ /*
+ * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
+ * restored VM (before the first KVM_RUN) to check that
+ * KVM_STATE_NESTED_EVMCS is not lost.
+ */
+ if (stage == 9) {
+ pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
+ save_restore_vm(vm);
+ }
}
done:
diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
index 8c77537af5a1..a711f83749ea 100644
--- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c
@@ -145,8 +145,7 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage)
struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
{
int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
- vm_vaddr_t gva = vm_vaddr_alloc(vm, size,
- getpagesize(), 0, 0);
+ vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
memcpy(guest_cpuids, cpuid, size);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
index 7f1d2765572c..e0b2bb1339b1 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -7,6 +7,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "hyperv.h"
struct ms_hyperv_tsc_page {
volatile u32 tsc_sequence;
@@ -15,13 +16,6 @@ struct ms_hyperv_tsc_page {
volatile s64 tsc_offset;
} __packed;
-#define HV_X64_MSR_GUEST_OS_ID 0x40000000
-#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
-#define HV_X64_MSR_REFERENCE_TSC 0x40000021
-#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
-#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
-#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
-
/* Simplified mul_u64_u64_shr() */
static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
{
@@ -220,8 +214,8 @@ int main(void)
vcpu_set_hv_cpuid(vm, VCPU_ID);
- tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
- memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
+ tsc_page_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
"TSC page has to be page aligned\n");
vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
new file mode 100644
index 000000000000..91d88aaa9899
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -0,0 +1,684 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V features enablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+#define VCPU_ID 0
+#define LINUX_OS_ID ((u64)0x8100 << 48)
+
+extern unsigned char rdmsr_start;
+extern unsigned char rdmsr_end;
+
+static u64 do_rdmsr(u32 idx)
+{
+ u32 lo, hi;
+
+ asm volatile("rdmsr_start: rdmsr;"
+ "rdmsr_end:"
+ : "=a"(lo), "=c"(hi)
+ : "c"(idx));
+
+ return (((u64) hi) << 32) | lo;
+}
+
+extern unsigned char wrmsr_start;
+extern unsigned char wrmsr_end;
+
+static void do_wrmsr(u32 idx, u64 val)
+{
+ u32 lo, hi;
+
+ lo = val;
+ hi = val >> 32;
+
+ asm volatile("wrmsr_start: wrmsr;"
+ "wrmsr_end:"
+ : : "a"(lo), "c"(idx), "d"(hi));
+}
+
+static int nr_gp;
+static int nr_ud;
+
+static inline u64 hypercall(u64 control, vm_vaddr_t input_address,
+ vm_vaddr_t output_address)
+{
+ u64 hv_status;
+
+ asm volatile("mov %3, %%r8\n"
+ "vmcall"
+ : "=a" (hv_status),
+ "+c" (control), "+d" (input_address)
+ : "r" (output_address)
+ : "cc", "memory", "r8", "r9", "r10", "r11");
+
+ return hv_status;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ unsigned char *rip = (unsigned char *)regs->rip;
+ bool r, w;
+
+ r = rip == &rdmsr_start;
+ w = rip == &wrmsr_start;
+ GUEST_ASSERT(r || w);
+
+ nr_gp++;
+
+ if (r)
+ regs->rip = (uint64_t)&rdmsr_end;
+ else
+ regs->rip = (uint64_t)&wrmsr_end;
+}
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ nr_ud++;
+ regs->rip += 3;
+}
+
+struct msr_data {
+ uint32_t idx;
+ bool available;
+ bool write;
+ u64 write_val;
+};
+
+struct hcall_data {
+ uint64_t control;
+ uint64_t expect;
+ bool ud_expected;
+};
+
+static void guest_msr(struct msr_data *msr)
+{
+ int i = 0;
+
+ while (msr->idx) {
+ WRITE_ONCE(nr_gp, 0);
+ if (!msr->write)
+ do_rdmsr(msr->idx);
+ else
+ do_wrmsr(msr->idx, msr->write_val);
+
+ if (msr->available)
+ GUEST_ASSERT(READ_ONCE(nr_gp) == 0);
+ else
+ GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+
+ GUEST_SYNC(i++);
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+{
+ int i = 0;
+ u64 res, input, output;
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+
+ while (hcall->control) {
+ nr_ud = 0;
+ if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+ input = pgs_gpa;
+ output = pgs_gpa + 4096;
+ } else {
+ input = output = 0;
+ }
+
+ res = hypercall(hcall->control, input, output);
+ if (hcall->ud_expected)
+ GUEST_ASSERT(nr_ud == 1);
+ else
+ GUEST_ASSERT(res == hcall->expect);
+
+ GUEST_SYNC(i++);
+ }
+
+ GUEST_DONE();
+}
+
+static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 *feat,
+ struct kvm_cpuid_entry2 *recomm,
+ struct kvm_cpuid_entry2 *dbg)
+{
+ TEST_ASSERT(set_cpuid(cpuid, feat),
+ "failed to set KVM_CPUID_FEATURES leaf");
+ TEST_ASSERT(set_cpuid(cpuid, recomm),
+ "failed to set HYPERV_CPUID_ENLIGHTMENT_INFO leaf");
+ TEST_ASSERT(set_cpuid(cpuid, dbg),
+ "failed to set HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES leaf");
+ vcpu_set_cpuid(vm, VCPU_ID, cpuid);
+}
+
+static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr,
+ struct kvm_cpuid2 *best)
+{
+ struct kvm_run *run;
+ struct ucall uc;
+ int stage = 0, r;
+ struct kvm_cpuid_entry2 feat = {
+ .function = HYPERV_CPUID_FEATURES
+ };
+ struct kvm_cpuid_entry2 recomm = {
+ .function = HYPERV_CPUID_ENLIGHTMENT_INFO
+ };
+ struct kvm_cpuid_entry2 dbg = {
+ .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
+ };
+ struct kvm_enable_cap cap = {0};
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ while (true) {
+ switch (stage) {
+ case 0:
+ /*
+ * Only available when Hyper-V identification is set
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 1:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 2:
+ feat.eax |= HV_MSR_HYPERCALL_AVAILABLE;
+ /*
+ * HV_X64_MSR_GUEST_OS_ID has to be written first to make
+ * HV_X64_MSR_HYPERCALL available.
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = 1;
+ msr->write_val = LINUX_OS_ID;
+ msr->available = 1;
+ break;
+ case 3:
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 4:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+
+ case 5:
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 6:
+ feat.eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 7:
+ /* Read only */
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 8:
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 9:
+ feat.eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 10:
+ /* Read only */
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 11:
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 12:
+ feat.eax |= HV_MSR_VP_INDEX_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 13:
+ /* Read only */
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 14:
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 15:
+ feat.eax |= HV_MSR_RESET_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 16:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+
+ case 17:
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 18:
+ feat.eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 19:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+
+ case 20:
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 21:
+ /*
+ * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
+ * capability enabled and guest visible CPUID bit unset.
+ */
+ cap.cap = KVM_CAP_HYPERV_SYNIC2;
+ vcpu_enable_cap(vm, VCPU_ID, &cap);
+ break;
+ case 22:
+ feat.eax |= HV_MSR_SYNIC_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 23:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+
+ case 24:
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 25:
+ feat.eax |= HV_MSR_SYNTIMER_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 26:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+ case 27:
+ /* Direct mode test */
+ msr->write = 1;
+ msr->write_val = 1 << 12;
+ msr->available = 0;
+ break;
+ case 28:
+ feat.edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
+ msr->available = 1;
+ break;
+
+ case 29:
+ msr->idx = HV_X64_MSR_EOI;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 30:
+ feat.eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 1;
+ break;
+
+ case 31:
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 32:
+ feat.eax |= HV_ACCESS_FREQUENCY_MSRS;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 33:
+ /* Read only */
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 34:
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 35:
+ feat.eax |= HV_ACCESS_REENLIGHTENMENT;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 36:
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 1;
+ break;
+ case 37:
+ /* Can only write '0' */
+ msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 0;
+ break;
+
+ case 38:
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 39:
+ feat.edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 40:
+ msr->write = 1;
+ msr->write_val = 1;
+ msr->available = 1;
+ break;
+
+ case 41:
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = 0;
+ msr->available = 0;
+ break;
+ case 42:
+ feat.edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
+ dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+ msr->write = 0;
+ msr->available = 1;
+ break;
+ case 43:
+ msr->write = 1;
+ msr->write_val = 0;
+ msr->available = 1;
+ break;
+
+ case 44:
+ /* END */
+ msr->idx = 0;
+ break;
+ }
+
+ hv_set_cpuid(vm, best, &feat, &recomm, &dbg);
+
+ if (msr->idx)
+ pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
+ msr->idx, msr->write ? "write" : "read");
+ else
+ pr_debug("Stage %d: finish\n", stage);
+
+ r = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)\n",
+ uc.args[1], stage);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ return;
+ case UCALL_DONE:
+ return;
+ }
+
+ stage++;
+ }
+}
+
+static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall,
+ void *input, void *output, struct kvm_cpuid2 *best)
+{
+ struct kvm_run *run;
+ struct ucall uc;
+ int stage = 0, r;
+ struct kvm_cpuid_entry2 feat = {
+ .function = HYPERV_CPUID_FEATURES,
+ .eax = HV_MSR_HYPERCALL_AVAILABLE
+ };
+ struct kvm_cpuid_entry2 recomm = {
+ .function = HYPERV_CPUID_ENLIGHTMENT_INFO
+ };
+ struct kvm_cpuid_entry2 dbg = {
+ .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
+ };
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ while (true) {
+ switch (stage) {
+ case 0:
+ hcall->control = 0xdeadbeef;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+
+ case 1:
+ hcall->control = HVCALL_POST_MESSAGE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 2:
+ feat.ebx |= HV_POST_MESSAGES;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 3:
+ hcall->control = HVCALL_SIGNAL_EVENT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 4:
+ feat.ebx |= HV_SIGNAL_EVENTS;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 5:
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+ case 6:
+ dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 7:
+ feat.ebx |= HV_DEBUGGING;
+ hcall->expect = HV_STATUS_OPERATION_DENIED;
+ break;
+
+ case 8:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 9:
+ recomm.eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 10:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 11:
+ recomm.eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 12:
+ hcall->control = HVCALL_SEND_IPI;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 13:
+ recomm.eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ case 14:
+ /* Nothing in 'sparse banks' -> success */
+ hcall->control = HVCALL_SEND_IPI_EX;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 15:
+ hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 16:
+ recomm.ebx = 0xfff;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 17:
+ /* XMM fast hypercall */
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+ hcall->ud_expected = true;
+ break;
+ case 18:
+ feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
+ hcall->ud_expected = false;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 19:
+ /* END */
+ hcall->control = 0;
+ break;
+ }
+
+ hv_set_cpuid(vm, best, &feat, &recomm, &dbg);
+
+ if (hcall->control)
+ pr_debug("Stage %d: testing hcall: 0x%lx\n", stage,
+ hcall->control);
+ else
+ pr_debug("Stage %d: finish\n", stage);
+
+ r = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)\n",
+ uc.args[1], stage);
+ break;
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ return;
+ case UCALL_DONE:
+ return;
+ }
+
+ stage++;
+ }
+}
+
+int main(void)
+{
+ struct kvm_cpuid2 *best;
+ struct kvm_vm *vm;
+ vm_vaddr_t msr_gva, hcall_page, hcall_params;
+ struct kvm_enable_cap cap = {
+ .cap = KVM_CAP_HYPERV_ENFORCE_CPUID,
+ .args = {1}
+ };
+
+ /* Test MSRs */
+ vm = vm_create_default(VCPU_ID, 0, guest_msr);
+
+ msr_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+ vcpu_args_set(vm, VCPU_ID, 1, msr_gva);
+ vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+ best = kvm_get_supported_hv_cpuid();
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+ pr_info("Testing access to Hyper-V specific MSRs\n");
+ guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva),
+ best);
+ kvm_vm_free(vm);
+
+ /* Test hypercalls */
+ vm = vm_create_default(VCPU_ID, 0, guest_hcall);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+ /* Hypercall input/output */
+ hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+ hcall_params = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+
+ vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+ vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+ best = kvm_get_supported_hv_cpuid();
+
+ pr_info("Testing access to Hyper-V hypercalls\n");
+ guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params),
+ addr_gva2hva(vm, hcall_page),
+ addr_gva2hva(vm, hcall_page) + getpagesize(),
+ best);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index 732b244d6956..04ed975662c9 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -227,7 +227,7 @@ int main(void)
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
enter_guest(vm);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
index e6480fd5c4bd..8039e1eff938 100644
--- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
@@ -82,7 +82,8 @@ int get_warnings_count(void)
FILE *f;
f = popen("dmesg | grep \"WARNING:\" | wc -l", "r");
- fscanf(f, "%d", &warnings);
+ if (fscanf(f, "%d", &warnings) < 1)
+ warnings = 0;
fclose(f);
return warnings;
diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
new file mode 100644
index 000000000000..da2325fcad87
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 1
+
+#define MMIO_GPA 0x100000000ull
+
+static void guest_code(void)
+{
+ (void)READ_ONCE(*((uint64_t *)MMIO_GPA));
+ (void)READ_ONCE(*((uint64_t *)MMIO_GPA));
+
+ GUEST_ASSERT(0);
+}
+
+static void guest_pf_handler(struct ex_regs *regs)
+{
+ /* PFEC == RSVD | PRESENT (read, kernel). */
+ GUEST_ASSERT(regs->error_code == 0x9);
+ GUEST_DONE();
+}
+
+static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val)
+{
+ u32 good_cpuid_val = *cpuid_reg;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ uint64_t cmd;
+ int r;
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ run = vcpu_state(vm, VCPU_ID);
+
+ /* Map 1gb page without a backing memlot. */
+ __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G);
+
+ r = _vcpu_run(vm, VCPU_ID);
+
+ /* Guest access to the 1gb page should trigger MMIO. */
+ TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_MMIO,
+ "Unexpected exit reason: %u (%s), expected MMIO exit (1gb page w/o memslot)\n",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(run->mmio.len == 8, "Unexpected exit mmio size = %u", run->mmio.len);
+
+ TEST_ASSERT(run->mmio.phys_addr == MMIO_GPA,
+ "Unexpected exit mmio address = 0x%llx", run->mmio.phys_addr);
+
+ /*
+ * Effect the CPUID change for the guest and re-enter the guest. Its
+ * access should now #PF due to the PAGE_SIZE bit being reserved or
+ * the resulting GPA being invalid. Note, kvm_get_supported_cpuid()
+ * returns the struct that contains the entry being modified. Eww.
+ */
+ *cpuid_reg = evil_cpuid_val;
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ /*
+ * Add a dummy memslot to coerce KVM into bumping the MMIO generation.
+ * KVM does not "officially" support mucking with CPUID after KVM_RUN,
+ * and will incorrectly reuse MMIO SPTEs. Don't delete the memslot!
+ * KVM x86 zaps all shadow pages on memslot deletion.
+ */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ MMIO_GPA << 1, 10, 1, 0);
+
+ /* Set up a #PF handler to eat the RSVD #PF and signal all done! */
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_install_exception_handler(vm, PF_VECTOR, guest_pf_handler);
+
+ r = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r);
+
+ cmd = get_ucall(vm, VCPU_ID, NULL);
+ TEST_ASSERT(cmd == UCALL_DONE,
+ "Unexpected guest exit, exit_reason=%s, ucall.cmd = %lu\n",
+ exit_reason_str(run->exit_reason), cmd);
+
+ /*
+ * Restore the happy CPUID value for the next test. Yes, changes are
+ * indeed persistent across VM destruction.
+ */
+ *cpuid_reg = good_cpuid_val;
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_cpuid_entry2 *entry;
+ int opt;
+
+ /*
+ * All tests are opt-in because TDP doesn't play nice with reserved #PF
+ * in the GVA->GPA translation. The hardware page walker doesn't let
+ * software change GBPAGES or MAXPHYADDR, and KVM doesn't manually walk
+ * the GVA on fault for performance reasons.
+ */
+ bool do_gbpages = false;
+ bool do_maxphyaddr = false;
+
+ setbuf(stdout, NULL);
+
+ while ((opt = getopt(argc, argv, "gm")) != -1) {
+ switch (opt) {
+ case 'g':
+ do_gbpages = true;
+ break;
+ case 'm':
+ do_maxphyaddr = true;
+ break;
+ case 'h':
+ default:
+ printf("usage: %s [-g (GBPAGES)] [-m (MAXPHYADDR)]\n", argv[0]);
+ break;
+ }
+ }
+
+ if (!do_gbpages && !do_maxphyaddr) {
+ print_skip("No sub-tests selected");
+ return 0;
+ }
+
+ entry = kvm_get_supported_cpuid_entry(0x80000001);
+ if (!(entry->edx & CPUID_GBPAGES)) {
+ print_skip("1gb hugepages not supported");
+ return 0;
+ }
+
+ if (do_gbpages) {
+ pr_info("Test MMIO after toggling CPUID.GBPAGES\n\n");
+ mmu_role_test(&entry->edx, entry->edx & ~CPUID_GBPAGES);
+ }
+
+ if (do_maxphyaddr) {
+ pr_info("Test MMIO after changing CPUID.MAXPHYADDR\n\n");
+ entry = kvm_get_supported_cpuid_entry(0x80000008);
+ mmu_role_test(&entry->eax, (entry->eax & ~0xff) | 0x20);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
index 12c558fc8074..ae76436af0cc 100644
--- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -14,16 +14,12 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "apic.h"
#define N_VCPU 2
#define VCPU_ID0 0
#define VCPU_ID1 1
-static uint32_t get_bsp_flag(void)
-{
- return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
-}
-
static void guest_bsp_vcpu(void *arg)
{
GUEST_SYNC(1);
@@ -94,7 +90,7 @@ static struct kvm_vm *create_vm(void)
pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages);
vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ kvm_vm_elf_load(vm, program_invocation_name);
vm_create_irqchip(vm);
return vm;
@@ -106,8 +102,6 @@ static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code)
vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu);
else
vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu);
-
- vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
}
static void run_vm_bsp(uint32_t bsp_vcpu)
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 613c42c5a9b8..d0fe2fdce58c 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -53,15 +53,28 @@ static inline void sync_with_host(uint64_t phase)
: "+a" (phase));
}
-void self_smi(void)
+static void self_smi(void)
{
- wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
- APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
}
-void guest_code(void *arg)
+static void l2_guest_code(void)
{
+ sync_with_host(8);
+
+ sync_with_host(10);
+
+ vmcall();
+}
+
+static void guest_code(void *arg)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+ struct svm_test_data *svm = arg;
+ struct vmx_pages *vmx_pages = arg;
sync_with_host(1);
@@ -74,21 +87,50 @@ void guest_code(void *arg)
sync_with_host(4);
if (arg) {
- if (cpu_has_svm())
- generic_svm_setup(arg, NULL, NULL);
- else
- GUEST_ASSERT(prepare_for_vmx_operation(arg));
+ if (cpu_has_svm()) {
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ } else {
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ }
sync_with_host(5);
self_smi();
sync_with_host(7);
+
+ if (cpu_has_svm()) {
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ svm->vmcb->save.rip += 3;
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ } else {
+ vmlaunch();
+ vmresume();
+ }
+
+ /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */
+ sync_with_host(12);
}
sync_with_host(DONE);
}
+void inject_smi(struct kvm_vm *vm)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+
+ events.smi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_SMM;
+
+ vcpu_events_set(vm, VCPU_ID, &events);
+}
+
int main(int argc, char *argv[])
{
vm_vaddr_t nested_gva = 0;
@@ -147,6 +189,22 @@ int main(int argc, char *argv[])
"Unexpected stage: #%x, got %x",
stage, stage_reported);
+ /*
+ * Enter SMM during L2 execution and check that we correctly
+ * return from it. Do not perform save/restore while in SMM yet.
+ */
+ if (stage == 8) {
+ inject_smi(vm);
+ continue;
+ }
+
+ /*
+ * Perform save/restore while the guest is in SMM triggered
+ * during L2 execution.
+ */
+ if (stage == 10)
+ inject_smi(vm);
+
state = vcpu_save_state(vm, VCPU_ID);
kvm_vm_release(vm);
kvm_vm_restart(vm, O_RDWR);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
new file mode 100644
index 000000000000..df04f56ce859
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "apic.h"
+
+#define VCPU_ID 0
+
+static struct kvm_vm *vm;
+
+bool vintr_irq_called;
+bool intr_irq_called;
+
+#define VINTR_IRQ_NUMBER 0x20
+#define INTR_IRQ_NUMBER 0x30
+
+static void vintr_irq_handler(struct ex_regs *regs)
+{
+ vintr_irq_called = true;
+}
+
+static void intr_irq_handler(struct ex_regs *regs)
+{
+ x2apic_write_reg(APIC_EOI, 0x00);
+ intr_irq_called = true;
+}
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
+ * and since L1 didn't enable virtual interrupt masking,
+ * L2 should receive it and not L1.
+ *
+ * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
+ * so it should also receive it after the following 'sti'.
+ */
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
+
+ __asm__ __volatile__(
+ "sti\n"
+ "nop\n"
+ );
+
+ GUEST_ASSERT(vintr_irq_called);
+ GUEST_ASSERT(intr_irq_called);
+
+ __asm__ __volatile__(
+ "vmcall\n"
+ );
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ x2apic_enable();
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* No virtual interrupt masking */
+ vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+ /* No intercepts for real and virtual interrupts */
+ vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR);
+
+ /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
+ vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
+ vmcb->control.int_vector = VINTR_IRQ_NUMBER;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t svm_gva;
+
+ nested_svm_check_supported();
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
+ vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s", (const char *)uc.args[0]);
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index d672f0a473f8..fc03a150278d 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -24,6 +24,10 @@
#define UCALL_PIO_PORT ((uint16_t)0x1000)
+struct ucall uc_none = {
+ .cmd = UCALL_NONE,
+};
+
/*
* ucall is embedded here to protect against compiler reshuffling registers
* before calling a function. In this test we only need to get KVM_EXIT_IO
@@ -34,7 +38,8 @@ void guest_code(void)
asm volatile("1: in %[port], %%al\n"
"add $0x1, %%rbx\n"
"jmp 1b"
- : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
+ : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
+ : "rax", "rbx");
}
static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
index e357d8e222d4..5a6a662f2e59 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -18,15 +18,6 @@
#define rounded_rdmsr(x) ROUND(rdmsr(x))
#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x))
-#define GUEST_ASSERT_EQ(a, b) do { \
- __typeof(a) _a = (a); \
- __typeof(b) _b = (b); \
- if (_a != _b) \
- ucall(UCALL_ABORT, 4, \
- "Failed guest assert: " \
- #a " == " #b, __LINE__, _a, _b); \
- } while(0)
-
static void guest_code(void)
{
u64 val = 0;
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
index 72c0d0797522..e3e20e8848d0 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -574,7 +574,7 @@ static void test_msr_filter_allow(void) {
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, VCPU_ID);
- vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
/* Process guest code userspace exits. */
run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
@@ -588,12 +588,12 @@ static void test_msr_filter_allow(void) {
run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT);
run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT);
- vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
run_guest(vm);
- vm_handle_exception(vm, UD_VECTOR, NULL);
+ vm_install_exception_handler(vm, UD_VECTOR, NULL);
if (process_ucall(vm) != UCALL_DONE) {
- vm_handle_exception(vm, GP_VECTOR, guest_fep_gp_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
/* Process emulated rdmsr and wrmsr instructions. */
run_guest_then_process_rdmsr(vm, MSR_IA32_XSS);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
index d14888b34adb..d438c4d3228a 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -96,7 +96,7 @@ int main(int argc, char *argv[])
}
vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- prepare_virtualize_apic_accesses(vmx, vm, 0);
+ prepare_virtualize_apic_accesses(vmx, vm);
vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa);
while (!done) {
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index 537de1068554..68f26a8b4f42 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -97,7 +97,7 @@ int main(int argc, char *argv[])
* Add an identity map for GVA range [0xc0000000, 0xc0002000). This
* affects both L1 and L2. However...
*/
- virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES, 0);
+ virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
/*
* ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
@@ -107,11 +107,11 @@ int main(int argc, char *argv[])
* meaning after the last call to virt_map.
*/
prepare_eptp(vmx, vm, 0);
- nested_map_memslot(vmx, vm, 0, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
+ nested_map_memslot(vmx, vm, 0);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
- bmap = bitmap_alloc(TEST_MEM_PAGES);
+ bmap = bitmap_zalloc(TEST_MEM_PAGES);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
while (!done) {
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
new file mode 100644
index 000000000000..280c01fd2412
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_nested_tsc_scaling_test
+ *
+ * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This test case verifies that nested TSC scaling behaves as expected when
+ * both L1 and L2 are scaled using different ratios. For this test we scale
+ * L1 down and scale L2 up.
+ */
+
+#include <time.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+#include "kselftest.h"
+
+
+#define VCPU_ID 0
+
+/* L2 is scaled up (from L1's perspective) by this factor */
+#define L2_SCALE_FACTOR 4ULL
+
+#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
+
+#define L2_GUEST_STACK_SIZE 64
+
+enum { USLEEP, UCHECK_L1, UCHECK_L2 };
+#define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec)
+#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
+
+
+/*
+ * This function checks whether the "actual" TSC frequency of a guest matches
+ * its expected frequency. In order to account for delays in taking the TSC
+ * measurements, a difference of 1% between the actual and the expected value
+ * is tolerated.
+ */
+static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+{
+ uint64_t tolerance, thresh_low, thresh_high;
+
+ tolerance = expected / 100;
+ thresh_low = expected - tolerance;
+ thresh_high = expected + tolerance;
+
+ TEST_ASSERT(thresh_low < actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+ TEST_ASSERT(thresh_high > actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+}
+
+static void check_tsc_freq(int level)
+{
+ uint64_t tsc_start, tsc_end, tsc_freq;
+
+ /*
+ * Reading the TSC twice with about a second's difference should give
+ * us an approximation of the TSC frequency from the guest's
+ * perspective. Now, this won't be completely accurate, but it should
+ * be good enough for the purposes of this test.
+ */
+ tsc_start = rdmsr(MSR_IA32_TSC);
+ GUEST_SLEEP(1);
+ tsc_end = rdmsr(MSR_IA32_TSC);
+
+ tsc_freq = tsc_end - tsc_start;
+
+ GUEST_CHECK(level, tsc_freq);
+}
+
+static void l2_guest_code(void)
+{
+ check_tsc_freq(UCHECK_L2);
+
+ /* exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* prepare the VMCS for L2 execution */
+ prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC offsetting and TSC scaling for L2 */
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+ control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+ control |= SECONDARY_EXEC_TSC_SCALING;
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+
+ vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
+ vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
+ vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
+
+ /* launch L2 */
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+static void tsc_scaling_check_supported(void)
+{
+ if (!kvm_check_cap(KVM_CAP_TSC_CONTROL)) {
+ print_skip("TSC scaling not supported by the HW");
+ exit(KSFT_SKIP);
+ }
+}
+
+static void stable_tsc_check_supported(void)
+{
+ FILE *fp;
+ char buf[4];
+
+ fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r");
+ if (fp == NULL)
+ goto skip_test;
+
+ if (fgets(buf, sizeof(buf), fp) == NULL)
+ goto skip_test;
+
+ if (strncmp(buf, "tsc", sizeof(buf)))
+ goto skip_test;
+
+ return;
+skip_test:
+ print_skip("Kernel does not use TSC clocksource - assuming that host TSC is not stable");
+ exit(KSFT_SKIP);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ vm_vaddr_t vmx_pages_gva;
+
+ uint64_t tsc_start, tsc_end;
+ uint64_t tsc_khz;
+ uint64_t l1_scale_factor;
+ uint64_t l0_tsc_freq = 0;
+ uint64_t l1_tsc_freq = 0;
+ uint64_t l2_tsc_freq = 0;
+
+ nested_vmx_check_supported();
+ tsc_scaling_check_supported();
+ stable_tsc_check_supported();
+
+ /*
+ * We set L1's scale factor to be a random number from 2 to 10.
+ * Ideally we would do the same for L2's factor but that one is
+ * referenced by both main() and l1_guest_code() and using a global
+ * variable does not work.
+ */
+ srand(time(NULL));
+ l1_scale_factor = (rand() % 9) + 2;
+ printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
+ printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
+
+ tsc_start = rdtsc();
+ sleep(1);
+ tsc_end = rdtsc();
+
+ l0_tsc_freq = tsc_end - tsc_start;
+ printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+ tsc_khz = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_TSC_KHZ, NULL);
+ TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
+
+ /* scale down L1's TSC frequency */
+ vcpu_ioctl(vm, VCPU_ID, KVM_SET_TSC_KHZ,
+ (void *) (tsc_khz / l1_scale_factor));
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s", (const char *) uc.args[0]);
+ case UCALL_SYNC:
+ switch (uc.args[0]) {
+ case USLEEP:
+ sleep(uc.args[1]);
+ break;
+ case UCHECK_L1:
+ l1_tsc_freq = uc.args[1];
+ printf("L1's TSC frequency is around: %"PRIu64
+ "\n", l1_tsc_freq);
+
+ compare_tsc_freq(l1_tsc_freq,
+ l0_tsc_freq / l1_scale_factor);
+ break;
+ case UCHECK_L2:
+ l2_tsc_freq = uc.args[1];
+ printf("L2's TSC frequency is around: %"PRIu64
+ "\n", l2_tsc_freq);
+
+ compare_tsc_freq(l2_tsc_freq,
+ l1_tsc_freq * L2_SCALE_FACTOR);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index 2f964cdc273c..afbbc40df884 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -42,8 +42,6 @@
#define HALTER_VCPU_ID 0
#define SENDER_VCPU_ID 1
-volatile uint32_t *apic_base = (volatile uint32_t *)APIC_DEFAULT_GPA;
-
/*
* Vector for IPI from sender vCPU to halting vCPU.
* Value is arbitrary and was chosen for the alternating bit pattern. Any
@@ -86,45 +84,6 @@ struct thread_params {
uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
};
-uint32_t read_apic_reg(uint reg)
-{
- return apic_base[reg >> 2];
-}
-
-void write_apic_reg(uint reg, uint32_t val)
-{
- apic_base[reg >> 2] = val;
-}
-
-void disable_apic(void)
-{
- wrmsr(MSR_IA32_APICBASE,
- rdmsr(MSR_IA32_APICBASE) &
- ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
-}
-
-void enable_xapic(void)
-{
- uint64_t val = rdmsr(MSR_IA32_APICBASE);
-
- /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
- if (val & MSR_IA32_APICBASE_EXTD) {
- disable_apic();
- wrmsr(MSR_IA32_APICBASE,
- rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
- } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
- wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
- }
-
- /*
- * Per SDM: reset value of spurious interrupt vector register has the
- * APIC software enabled bit=0. It must be enabled in addition to the
- * enable bit in the MSR.
- */
- val = read_apic_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
- write_apic_reg(APIC_SPIV, val);
-}
-
void verify_apic_base_addr(void)
{
uint64_t msr = rdmsr(MSR_IA32_APICBASE);
@@ -136,10 +95,10 @@ void verify_apic_base_addr(void)
static void halter_guest_code(struct test_data_page *data)
{
verify_apic_base_addr();
- enable_xapic();
+ xapic_enable();
- data->halter_apic_id = GET_APIC_ID_FIELD(read_apic_reg(APIC_ID));
- data->halter_lvr = read_apic_reg(APIC_LVR);
+ data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+ data->halter_lvr = xapic_read_reg(APIC_LVR);
/*
* Loop forever HLTing and recording halts & wakes. Disable interrupts
@@ -150,8 +109,8 @@ static void halter_guest_code(struct test_data_page *data)
* TPR and PPR for diagnostic purposes in case the test fails.
*/
for (;;) {
- data->halter_tpr = read_apic_reg(APIC_TASKPRI);
- data->halter_ppr = read_apic_reg(APIC_PROCPRI);
+ data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
+ data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
data->hlt_count++;
asm volatile("sti; hlt; cli");
data->wake_count++;
@@ -166,7 +125,7 @@ static void halter_guest_code(struct test_data_page *data)
static void guest_ipi_handler(struct ex_regs *regs)
{
ipis_rcvd++;
- write_apic_reg(APIC_EOI, 77);
+ xapic_write_reg(APIC_EOI, 77);
}
static void sender_guest_code(struct test_data_page *data)
@@ -179,7 +138,7 @@ static void sender_guest_code(struct test_data_page *data)
uint64_t tsc_start;
verify_apic_base_addr();
- enable_xapic();
+ xapic_enable();
/*
* Init interrupt command register for sending IPIs
@@ -206,8 +165,8 @@ static void sender_guest_code(struct test_data_page *data)
* First IPI can be sent unconditionally because halter vCPU
* starts earlier.
*/
- write_apic_reg(APIC_ICR2, icr2_val);
- write_apic_reg(APIC_ICR, icr_val);
+ xapic_write_reg(APIC_ICR2, icr2_val);
+ xapic_write_reg(APIC_ICR, icr_val);
data->ipis_sent++;
/*
@@ -462,13 +421,13 @@ int main(int argc, char *argv[])
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID);
- vm_handle_exception(vm, IPI_VECTOR, guest_ipi_handler);
+ vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
- virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA, 0);
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code);
- test_data_page_vaddr = vm_vaddr_alloc(vm, 0x1000, 0x1000, 0, 0);
+ test_data_page_vaddr = vm_vaddr_alloc_page(vm);
data =
(struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr);
memset(data, 0, sizeof(*data));
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 1f4a0599683c..eda0d2a51224 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -14,7 +14,6 @@
#include <stdint.h>
#include <time.h>
#include <sched.h>
-#include <sys/syscall.h>
#define VCPU_ID 5
@@ -98,20 +97,6 @@ static void guest_code(void)
GUEST_DONE();
}
-static long get_run_delay(void)
-{
- char path[64];
- long val[2];
- FILE *fp;
-
- sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
- fp = fopen(path, "r");
- fscanf(fp, "%ld %ld ", &val[0], &val[1]);
- fclose(fp);
-
- return val[1];
-}
-
static int cmp_timespec(struct timespec *a, struct timespec *b)
{
if (a->tv_sec > b->tv_sec)
@@ -146,7 +131,7 @@ int main(int argc, char *argv[])
/* Map a region for the shared_info page */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
- virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0);
+ virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
struct kvm_xen_hvm_config hvmc = {
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
index 8389e0bfd711..adc94452b57c 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -103,7 +103,7 @@ int main(int argc, char *argv[])
/* Map a region for the hypercall pages */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
- virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2, 0);
+ virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
for (;;) {
volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 0af84ad48aa7..fe7ee2b0f29c 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -48,6 +48,7 @@ ARCH ?= $(SUBARCH)
# When local build is done, headers are installed in the default
# INSTALL_HDR_PATH usr/include.
.PHONY: khdr
+.NOTPARALLEL:
khdr:
ifndef KSFT_KHDR_INSTALL_DONE
ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
@@ -100,6 +101,7 @@ define INSTALL_RULE
$(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE)
$(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
$(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(wildcard config settings)) $(INSTALL_SINGLE_RULE)
endef
install: all
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index a105f094676e..ee71fc99d5b5 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,6 +4,6 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh strscpy.sh
include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index b80ee3f6e265..645839b50b0a 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,4 +1,5 @@
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_STRSCPY=m
diff --git a/tools/testing/selftests/lib/scanf.sh b/tools/testing/selftests/lib/scanf.sh
new file mode 100755
index 000000000000..b59b8ba561c3
--- /dev/null
+++ b/tools/testing/selftests/lib/scanf.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Tests the scanf infrastructure using test_scanf kernel module.
+$(dirname $0)/../kselftest/module.sh "scanf" test_scanf
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index d874990e442b..38edea25631b 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -1 +1,10 @@
CONFIG_LKDTM=y
+CONFIG_DEBUG_LIST=y
+CONFIG_SLAB_FREELIST_HARDENED=y
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_HARDENED_USERCOPY=y
+# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
+CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
+CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+CONFIG_UBSAN_BOUNDS=y
+CONFIG_UBSAN_TRAP=y
diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh
index bb7a1775307b..e95e79bd3126 100755
--- a/tools/testing/selftests/lkdtm/run.sh
+++ b/tools/testing/selftests/lkdtm/run.sh
@@ -76,10 +76,14 @@ fi
# Save existing dmesg so we can detect new content below
dmesg > "$DMESG"
-# Most shells yell about signals and we're expecting the "cat" process
-# to usually be killed by the kernel. So we have to run it in a sub-shell
-# and silence errors.
-($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true
+# Since the kernel is likely killing the process writing to the trigger
+# file, it must not be the script's shell itself. i.e. we cannot do:
+# echo "$test" >"$TRIGGER"
+# Instead, use "cat" to take the signal. Since the shell will yell about
+# the signal that killed the subprocess, we must ignore the failure and
+# continue. However we don't silence stderr since there might be other
+# useful details reported there in the case of other unexpected conditions.
+echo "$test" | cat >"$TRIGGER" || true
# Record and dump the results
dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true
diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh
index b1b8a5097cbb..1b4d95d575f8 100755
--- a/tools/testing/selftests/lkdtm/stack-entropy.sh
+++ b/tools/testing/selftests/lkdtm/stack-entropy.sh
@@ -30,6 +30,7 @@ rm -f "$log"
# We would expect any functional stack randomization to be at least 5 bits.
if [ "$bits" -lt 5 ]; then
+ echo "Stack entropy is low! Booted without 'randomize_kstack_offset=y'?"
exit 1
else
exit 0
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 11ef159be0fd..09f7bfa383cc 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -7,19 +7,23 @@ EXCEPTION
#EXHAUST_STACK Corrupts memory on failure
#CORRUPT_STACK Crashes entire system on success
#CORRUPT_STACK_STRONG Crashes entire system on success
+ARRAY_BOUNDS
CORRUPT_LIST_ADD list_add corruption
CORRUPT_LIST_DEL list_del corruption
STACK_GUARD_PAGE_LEADING
STACK_GUARD_PAGE_TRAILING
-UNSET_SMEP CR4 bits went missing
+UNSET_SMEP pinned CR4 bits changed:
DOUBLE_FAULT
CORRUPT_PAC
UNALIGNED_LOAD_STORE_WRITE
-#OVERWRITE_ALLOCATION Corrupts memory on failure
+SLAB_LINEAR_OVERFLOW
+VMALLOC_LINEAR_OVERFLOW
#WRITE_AFTER_FREE Corrupts memory on failure
-READ_AFTER_FREE
+READ_AFTER_FREE call trace:|Memory correctly poisoned
#WRITE_BUDDY_AFTER_FREE Corrupts memory on failure
-READ_BUDDY_AFTER_FREE
+READ_BUDDY_AFTER_FREE call trace:|Memory correctly poisoned
+SLAB_INIT_ON_ALLOC Memory appears initialized
+BUDDY_INIT_ON_ALLOC Memory appears initialized
SLAB_FREE_DOUBLE
SLAB_FREE_CROSS
SLAB_FREE_PAGE
@@ -69,4 +73,6 @@ USERCOPY_KERNEL
STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
CFI_FORWARD_PROTO
FORTIFIED_STRSCPY
+FORTIFIED_OBJECT
+FORTIFIED_SUBOBJECT
PPC_SLB_MULTIHIT Recovered
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 74baab83fec3..192a2899bae8 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -56,7 +56,7 @@ static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
static int mfd_assert_reopen_fd(int fd_in)
{
- int r, fd;
+ int fd;
char path[100];
sprintf(path, "/proc/self/fd/%d", fd_in);
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
index b37585e6aa38..46a97f318f58 100755
--- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
+++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
@@ -282,7 +282,9 @@ done
#
echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
for memory in `hotpluggable_online_memory`; do
- offline_memory_expect_fail $memory
+ if [ $((RANDOM % 100)) -lt $ratio ]; then
+ offline_memory_expect_fail $memory
+ fi
done
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
index 4e94e566e040..f31205f04ee0 100644
--- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -136,6 +136,10 @@ struct mount_attr {
#define MOUNT_ATTR_IDMAP 0x00100000
#endif
+#ifndef MOUNT_ATTR_NOSYMFOLLOW
+#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
+#endif
+
static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
struct mount_attr *attr, size_t size)
{
@@ -235,6 +239,10 @@ static int prepare_unpriv_mountns(void)
return 0;
}
+#ifndef ST_NOSYMFOLLOW
+#define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
+#endif
+
static int read_mnt_flags(const char *path)
{
int ret;
@@ -245,9 +253,9 @@ static int read_mnt_flags(const char *path)
if (ret != 0)
return -EINVAL;
- if (stat.f_flag &
- ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | ST_NOATIME |
- ST_NODIRATIME | ST_RELATIME | ST_SYNCHRONOUS | ST_MANDLOCK))
+ if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
+ ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
+ ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
return -EINVAL;
mnt_flags = 0;
@@ -269,6 +277,8 @@ static int read_mnt_flags(const char *path)
mnt_flags |= MS_SYNCHRONOUS;
if (stat.f_flag & ST_MANDLOCK)
mnt_flags |= ST_MANDLOCK;
+ if (stat.f_flag & ST_NOSYMFOLLOW)
+ mnt_flags |= ST_NOSYMFOLLOW;
return mnt_flags;
}
@@ -368,8 +378,13 @@ static bool mount_setattr_supported(void)
FIXTURE(mount_setattr) {
};
+#define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
+#define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
+
FIXTURE_SETUP(mount_setattr)
{
+ int fd = -EBADF;
+
if (!mount_setattr_supported())
SKIP(return, "mount_setattr syscall not supported");
@@ -412,6 +427,11 @@ FIXTURE_SETUP(mount_setattr)
ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+
+ fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
+ ASSERT_EQ(close(fd), 0);
}
FIXTURE_TEARDOWN(mount_setattr)
@@ -1421,4 +1441,66 @@ TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
}
+TEST_F(mount_setattr, mount_attr_nosymfollow)
+{
+ int fd;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_NOSYMFOLLOW,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(close(fd), 0);
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= ST_NOSYMFOLLOW;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, ELOOP);
+
+ attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
+ attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~ST_NOSYMFOLLOW;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(close(fd), 0);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/move_mount_set_group/.gitignore b/tools/testing/selftests/move_mount_set_group/.gitignore
new file mode 100644
index 000000000000..f5e339268720
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/.gitignore
@@ -0,0 +1 @@
+move_mount_set_group_test
diff --git a/tools/testing/selftests/move_mount_set_group/Makefile b/tools/testing/selftests/move_mount_set_group/Makefile
new file mode 100644
index 000000000000..80c2d86812b0
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -g -I../../../../usr/include/ -Wall -O2
+
+TEST_GEN_FILES += move_mount_set_group_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/move_mount_set_group/config b/tools/testing/selftests/move_mount_set_group/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
new file mode 100644
index 000000000000..860198f83a53
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <sys/syscall.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef CLONE_NEWNS
+#define CLONE_NEWNS 0x00020000
+#endif
+
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1 << 20)
+#endif
+
+#ifndef MS_PRIVATE
+#define MS_PRIVATE (1<<18)
+#endif
+
+#ifndef MOVE_MOUNT_SET_GROUP
+#define MOVE_MOUNT_SET_GROUP 0x00000100
+#endif
+
+#ifndef MOVE_MOUNT_F_EMPTY_PATH
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004
+#endif
+
+#ifndef MOVE_MOUNT_T_EMPTY_PATH
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040
+#endif
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+static int prepare_unpriv_mountns(void)
+{
+ if (create_and_enter_userns())
+ return -1;
+
+ if (unshare(CLONE_NEWNS))
+ return -1;
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ return -1;
+
+ return 0;
+}
+
+static char *get_field(char *src, int nfields)
+{
+ int i;
+ char *p = src;
+
+ for (i = 0; i < nfields; i++) {
+ while (*p && *p != ' ' && *p != '\t')
+ p++;
+
+ if (!*p)
+ break;
+
+ p++;
+ }
+
+ return p;
+}
+
+static void null_endofword(char *word)
+{
+ while (*word && *word != ' ' && *word != '\t')
+ word++;
+ *word = '\0';
+}
+
+static bool is_shared_mount(const char *path)
+{
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f = NULL;
+
+ f = fopen("/proc/self/mountinfo", "re");
+ if (!f)
+ return false;
+
+ while (getline(&line, &len, f) != -1) {
+ char *opts, *target;
+
+ target = get_field(line, 4);
+ if (!target)
+ continue;
+
+ opts = get_field(target, 2);
+ if (!opts)
+ continue;
+
+ null_endofword(target);
+
+ if (strcmp(target, path) != 0)
+ continue;
+
+ null_endofword(opts);
+ if (strstr(opts, "shared:"))
+ return true;
+ }
+
+ free(line);
+ fclose(f);
+
+ return false;
+}
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
+#define SET_GROUP_FROM "/tmp/move_mount_set_group_supported_from"
+#define SET_GROUP_TO "/tmp/move_mount_set_group_supported_to"
+
+static int move_mount_set_group_supported(void)
+{
+ int ret;
+
+ if (mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"))
+ return -1;
+
+ if (mount(NULL, "/tmp", NULL, MS_PRIVATE, 0))
+ return -1;
+
+ if (mkdir(SET_GROUP_FROM, 0777))
+ return -1;
+
+ if (mkdir(SET_GROUP_TO, 0777))
+ return -1;
+
+ if (mount("testing", SET_GROUP_FROM, "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"))
+ return -1;
+
+ if (mount(SET_GROUP_FROM, SET_GROUP_TO, NULL, MS_BIND, NULL))
+ return -1;
+
+ if (mount(NULL, SET_GROUP_FROM, NULL, MS_SHARED, 0))
+ return -1;
+
+ ret = syscall(SYS_move_mount, AT_FDCWD, SET_GROUP_FROM,
+ AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP);
+ umount2("/tmp", MNT_DETACH);
+
+ return ret < 0 ? false : true;
+}
+
+FIXTURE(move_mount_set_group) {
+};
+
+#define SET_GROUP_A "/tmp/A"
+
+FIXTURE_SETUP(move_mount_set_group)
+{
+ int ret;
+
+ ASSERT_EQ(prepare_unpriv_mountns(), 0);
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir(SET_GROUP_A, 0777), 0);
+
+ ASSERT_EQ(mount("testing", SET_GROUP_A, "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+}
+
+FIXTURE_TEARDOWN(move_mount_set_group)
+{
+ int ret;
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ umount2("/tmp", MNT_DETACH);
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+struct child_args {
+ int unsfd;
+ int mntnsfd;
+ bool shared;
+ int mntfd;
+};
+
+static int get_nestedns_mount_cb(void *data)
+{
+ struct child_args *ca = (struct child_args *)data;
+ int ret;
+
+ ret = prepare_unpriv_mountns();
+ if (ret)
+ return 1;
+
+ if (ca->shared) {
+ ret = mount(NULL, SET_GROUP_A, NULL, MS_SHARED, 0);
+ if (ret)
+ return 1;
+ }
+
+ ret = open("/proc/self/ns/user", O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->unsfd = ret;
+
+ ret = open("/proc/self/ns/mnt", O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->mntnsfd = ret;
+
+ ret = open(SET_GROUP_A, O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->mntfd = ret;
+
+ return 0;
+}
+
+TEST_F(move_mount_set_group, complex_sharing_copying)
+{
+ struct child_args ca_from = {
+ .shared = true,
+ };
+ struct child_args ca_to = {
+ .shared = false,
+ };
+ pid_t pid;
+ int ret;
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ pid = do_clone(get_nestedns_mount_cb, (void *)&ca_from, CLONE_VFORK |
+ CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
+ ASSERT_EQ(wait_for_pid(pid), 0);
+
+ pid = do_clone(get_nestedns_mount_cb, (void *)&ca_to, CLONE_VFORK |
+ CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
+ ASSERT_EQ(wait_for_pid(pid), 0);
+
+ ASSERT_EQ(syscall(SYS_move_mount, ca_from.mntfd, "",
+ ca_to.mntfd, "", MOVE_MOUNT_SET_GROUP
+ | MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH),
+ 0);
+
+ ASSERT_EQ(setns(ca_to.mntnsfd, CLONE_NEWNS), 0);
+ ASSERT_EQ(is_shared_mount(SET_GROUP_A), 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c
index 57b505cb1561..162c41e9bcae 100644
--- a/tools/testing/selftests/nci/nci_dev.c
+++ b/tools/testing/selftests/nci/nci_dev.c
@@ -57,6 +57,29 @@ const __u8 nci_init_rsp_v2[] = {0x40, 0x01, 0x1c, 0x00, 0x1a, 0x7e, 0x06,
const __u8 nci_rf_disc_map_rsp[] = {0x41, 0x00, 0x01, 0x00};
const __u8 nci_rf_disc_rsp[] = {0x41, 0x03, 0x01, 0x00};
const __u8 nci_rf_deact_rsp[] = {0x41, 0x06, 0x01, 0x00};
+const __u8 nci_rf_deact_ntf[] = {0x61, 0x06, 0x02, 0x00, 0x00};
+const __u8 nci_rf_activate_ntf[] = {0x61, 0x05, 0x1D, 0x01, 0x02, 0x04, 0x00,
+ 0xFF, 0xFF, 0x0C, 0x44, 0x03, 0x07, 0x04,
+ 0x62, 0x26, 0x11, 0x80, 0x1D, 0x80, 0x01,
+ 0x20, 0x00, 0x00, 0x00, 0x06, 0x05, 0x75,
+ 0x77, 0x81, 0x02, 0x80};
+const __u8 nci_t4t_select_cmd[] = {0x00, 0x00, 0x0C, 0x00, 0xA4, 0x04, 0x00,
+ 0x07, 0xD2, 0x76, 0x00, 0x00, 0x85, 0x01, 0x01};
+const __u8 nci_t4t_select_cmd2[] = {0x00, 0x00, 0x07, 0x00, 0xA4, 0x00, 0x0C, 0x02,
+ 0xE1, 0x03};
+const __u8 nci_t4t_select_cmd3[] = {0x00, 0x00, 0x07, 0x00, 0xA4, 0x00, 0x0C, 0x02,
+ 0xE1, 0x04};
+const __u8 nci_t4t_read_cmd[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x00, 0x0F};
+const __u8 nci_t4t_read_rsp[] = {0x00, 0x00, 0x11, 0x00, 0x0F, 0x20, 0x00, 0x3B,
+ 0x00, 0x34, 0x04, 0x06, 0xE1, 0x04, 0x08, 0x00,
+ 0x00, 0x00, 0x90, 0x00};
+const __u8 nci_t4t_read_cmd2[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x00, 0x02};
+const __u8 nci_t4t_read_rsp2[] = {0x00, 0x00, 0x04, 0x00, 0x0F, 0x90, 0x00};
+const __u8 nci_t4t_read_cmd3[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x02, 0x0F};
+const __u8 nci_t4t_read_rsp3[] = {0x00, 0x00, 0x11, 0xD1, 0x01, 0x0B, 0x54, 0x02,
+ 0x65, 0x6E, 0x4E, 0x46, 0x43, 0x20, 0x54, 0x45,
+ 0x53, 0x54, 0x90, 0x00};
+const __u8 nci_t4t_rsp_ok[] = {0x00, 0x00, 0x02, 0x90, 0x00};
struct msgtemplate {
struct nlmsghdr n;
@@ -87,7 +110,7 @@ error:
static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
__u8 genl_cmd, int nla_num, __u16 nla_type[],
- void *nla_data[], int nla_len[])
+ void *nla_data[], int nla_len[], __u16 flags)
{
struct sockaddr_nl nladdr;
struct msgtemplate msg;
@@ -98,7 +121,7 @@ static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
msg.n.nlmsg_type = nlmsg_type;
- msg.n.nlmsg_flags = NLM_F_REQUEST;
+ msg.n.nlmsg_flags = flags;
msg.n.nlmsg_seq = 0;
msg.n.nlmsg_pid = nlmsg_pid;
msg.g.cmd = genl_cmd;
@@ -110,11 +133,11 @@ static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
na->nla_type = nla_type[cnt];
na->nla_len = nla_len[cnt] + NLA_HDRLEN;
- if (nla_len > 0)
+ if (nla_len[cnt] > 0)
memcpy(NLA_DATA(na), nla_data[cnt], nla_len[cnt]);
- msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
- prv_len = na->nla_len;
+ prv_len = NLA_ALIGN(nla_len[cnt]) + NLA_HDRLEN;
+ msg.n.nlmsg_len += prv_len;
}
buf = (char *)&msg;
@@ -146,11 +169,11 @@ static int send_get_nfc_family(int sd, __u32 pid)
nla_get_family_data = family_name;
return send_cmd_mt_nla(sd, GENL_ID_CTRL, pid, CTRL_CMD_GETFAMILY,
- 1, &nla_get_family_type,
- &nla_get_family_data, &nla_get_family_len);
+ 1, &nla_get_family_type, &nla_get_family_data,
+ &nla_get_family_len, NLM_F_REQUEST);
}
-static int get_family_id(int sd, __u32 pid)
+static int get_family_id(int sd, __u32 pid, __u32 *event_group)
{
struct {
struct nlmsghdr n;
@@ -158,8 +181,9 @@ static int get_family_id(int sd, __u32 pid)
char buf[512];
} ans;
struct nlattr *na;
- int rep_len;
+ int resp_len;
__u16 id;
+ int len;
int rc;
rc = send_get_nfc_family(sd, pid);
@@ -167,17 +191,49 @@ static int get_family_id(int sd, __u32 pid)
if (rc < 0)
return 0;
- rep_len = recv(sd, &ans, sizeof(ans), 0);
+ resp_len = recv(sd, &ans, sizeof(ans), 0);
- if (ans.n.nlmsg_type == NLMSG_ERROR || rep_len < 0 ||
- !NLMSG_OK(&ans.n, rep_len))
+ if (ans.n.nlmsg_type == NLMSG_ERROR || resp_len < 0 ||
+ !NLMSG_OK(&ans.n, resp_len))
return 0;
+ len = 0;
+ resp_len = GENLMSG_PAYLOAD(&ans.n);
na = (struct nlattr *)GENLMSG_DATA(&ans);
- na = (struct nlattr *)((char *)na + NLA_ALIGN(na->nla_len));
- if (na->nla_type == CTRL_ATTR_FAMILY_ID)
- id = *(__u16 *)NLA_DATA(na);
+ while (len < resp_len) {
+ len += NLA_ALIGN(na->nla_len);
+ if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
+ id = *(__u16 *)NLA_DATA(na);
+ } else if (na->nla_type == CTRL_ATTR_MCAST_GROUPS) {
+ struct nlattr *nested_na;
+ struct nlattr *group_na;
+ int group_attr_len;
+ int group_attr;
+
+ nested_na = (struct nlattr *)((char *)na + NLA_HDRLEN);
+ group_na = (struct nlattr *)((char *)nested_na + NLA_HDRLEN);
+ group_attr_len = 0;
+
+ for (group_attr = CTRL_ATTR_MCAST_GRP_UNSPEC;
+ group_attr < CTRL_ATTR_MCAST_GRP_MAX; group_attr++) {
+ if (group_na->nla_type == CTRL_ATTR_MCAST_GRP_ID) {
+ *event_group = *(__u32 *)((char *)group_na +
+ NLA_HDRLEN);
+ break;
+ }
+
+ group_attr_len += NLA_ALIGN(group_na->nla_len) +
+ NLA_HDRLEN;
+ if (group_attr_len >= nested_na->nla_len)
+ break;
+
+ group_na = (struct nlattr *)((char *)group_na +
+ NLA_ALIGN(group_na->nla_len));
+ }
+ }
+ na = (struct nlattr *)(GENLMSG_DATA(&ans) + len);
+ }
return id;
}
@@ -189,12 +245,12 @@ static int send_cmd_with_idx(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
int nla_len = 4;
return send_cmd_mt_nla(sd, nlmsg_type, nlmsg_pid, genl_cmd, 1,
- &nla_type, &nla_data, &nla_len);
+ &nla_type, &nla_data, &nla_len, NLM_F_REQUEST);
}
static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtemplate *msg)
{
- int rc, rep_len;
+ int rc, resp_len;
rc = send_cmd_with_idx(sd, fid, pid, NFC_CMD_GET_DEVICE, dev_id);
if (rc < 0) {
@@ -202,14 +258,14 @@ static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtem
goto error;
}
- rep_len = recv(sd, msg, sizeof(*msg), 0);
- if (rep_len < 0) {
+ resp_len = recv(sd, msg, sizeof(*msg), 0);
+ if (resp_len < 0) {
rc = -2;
goto error;
}
if (msg->n.nlmsg_type == NLMSG_ERROR ||
- !NLMSG_OK(&msg->n, rep_len)) {
+ !NLMSG_OK(&msg->n, resp_len)) {
rc = -3;
goto error;
}
@@ -222,21 +278,21 @@ error:
static __u8 get_dev_enable_state(struct msgtemplate *msg)
{
struct nlattr *na;
- int rep_len;
+ int resp_len;
int len;
- rep_len = GENLMSG_PAYLOAD(&msg->n);
+ resp_len = GENLMSG_PAYLOAD(&msg->n);
na = (struct nlattr *)GENLMSG_DATA(msg);
len = 0;
- while (len < rep_len) {
+ while (len < resp_len) {
len += NLA_ALIGN(na->nla_len);
if (na->nla_type == NFC_ATTR_DEVICE_POWERED)
return *(char *)NLA_DATA(na);
na = (struct nlattr *)(GENLMSG_DATA(msg) + len);
}
- return rep_len;
+ return resp_len;
}
FIXTURE(NCI) {
@@ -270,8 +326,7 @@ static void *virtual_dev_open(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_reset_cmd))
@@ -280,8 +335,7 @@ static void *virtual_dev_open(void *data)
goto error;
write(dev_fd, nci_reset_rsp, sizeof(nci_reset_rsp));
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_init_cmd))
@@ -290,8 +344,7 @@ static void *virtual_dev_open(void *data)
goto error;
write(dev_fd, nci_init_rsp, sizeof(nci_init_rsp));
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_rf_disc_map_cmd))
@@ -313,8 +366,7 @@ static void *virtual_dev_open_v2(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_reset_cmd))
@@ -324,8 +376,7 @@ static void *virtual_dev_open_v2(void *data)
write(dev_fd, nci_reset_rsp_v2, sizeof(nci_reset_rsp_v2));
write(dev_fd, nci_reset_ntf, sizeof(nci_reset_ntf));
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_init_cmd_v2))
@@ -334,8 +385,7 @@ static void *virtual_dev_open_v2(void *data)
goto error;
write(dev_fd, nci_init_rsp_v2, sizeof(nci_init_rsp_v2));
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_rf_disc_map_cmd))
@@ -353,6 +403,7 @@ FIXTURE_SETUP(NCI)
{
struct msgtemplate msg;
pthread_t thread_t;
+ __u32 event_group;
int status;
int rc;
@@ -364,12 +415,16 @@ FIXTURE_SETUP(NCI)
ASSERT_NE(self->sd, -1);
self->pid = getpid();
- self->fid = get_family_id(self->sd, self->pid);
+ self->fid = get_family_id(self->sd, self->pid, &event_group);
ASSERT_NE(self->fid, -1);
self->virtual_nci_fd = open("/dev/virtual_nci", O_RDWR);
ASSERT_GT(self->virtual_nci_fd, -1);
+ rc = setsockopt(self->sd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &event_group,
+ sizeof(event_group));
+ ASSERT_NE(rc, -1);
+
rc = ioctl(self->virtual_nci_fd, IOCTL_GET_NCIDEV_IDX, &self->dev_idex);
ASSERT_EQ(rc, 0);
@@ -402,8 +457,7 @@ static void *virtual_deinit(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_reset_cmd))
@@ -425,8 +479,7 @@ static void *virtual_deinit_v2(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_reset_cmd))
@@ -489,16 +542,14 @@ static void *virtual_poll_start(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_rf_discovery_cmd))
goto error;
if (memcmp(nci_rf_discovery_cmd, buf, len))
goto error;
- write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp))
- ;
+ write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp));
return (void *)0;
error:
@@ -513,8 +564,7 @@ static void *virtual_poll_stop(void *data)
dev_fd = *(int *)data;
- while ((len = read(dev_fd, buf, 258)) == 0)
- ;
+ len = read(dev_fd, buf, 258);
if (len <= 0)
goto error;
if (len != sizeof(nci_rf_deact_cmd))
@@ -528,38 +578,282 @@ error:
return (void *)-1;
}
-TEST_F(NCI, start_poll)
+int start_polling(int dev_idx, int proto, int virtual_fd, int sd, int fid, int pid)
{
__u16 nla_start_poll_type[2] = {NFC_ATTR_DEVICE_INDEX,
NFC_ATTR_PROTOCOLS};
- void *nla_start_poll_data[2] = {&self->dev_idex, &self->proto};
+ void *nla_start_poll_data[2] = {&dev_idx, &proto};
int nla_start_poll_len[2] = {4, 4};
pthread_t thread_t;
int status;
int rc;
rc = pthread_create(&thread_t, NULL, virtual_poll_start,
- (void *)&self->virtual_nci_fd);
- ASSERT_GT(rc, -1);
+ (void *)&virtual_fd);
+ if (rc < 0)
+ return rc;
- rc = send_cmd_mt_nla(self->sd, self->fid, self->pid,
- NFC_CMD_START_POLL, 2, nla_start_poll_type,
- nla_start_poll_data, nla_start_poll_len);
- EXPECT_EQ(rc, 0);
+ rc = send_cmd_mt_nla(sd, fid, pid, NFC_CMD_START_POLL, 2, nla_start_poll_type,
+ nla_start_poll_data, nla_start_poll_len, NLM_F_REQUEST);
+ if (rc != 0)
+ return rc;
pthread_join(thread_t, (void **)&status);
- ASSERT_EQ(status, 0);
+ return status;
+}
+
+int stop_polling(int dev_idx, int virtual_fd, int sd, int fid, int pid)
+{
+ pthread_t thread_t;
+ int status;
+ int rc;
rc = pthread_create(&thread_t, NULL, virtual_poll_stop,
- (void *)&self->virtual_nci_fd);
- ASSERT_GT(rc, -1);
+ (void *)&virtual_fd);
+ if (rc < 0)
+ return rc;
- rc = send_cmd_with_idx(self->sd, self->fid, self->pid,
- NFC_CMD_STOP_POLL, self->dev_idex);
- EXPECT_EQ(rc, 0);
+ rc = send_cmd_with_idx(sd, fid, pid,
+ NFC_CMD_STOP_POLL, dev_idx);
+ if (rc != 0)
+ return rc;
+
+ pthread_join(thread_t, (void **)&status);
+ return status;
+}
+
+TEST_F(NCI, start_poll)
+{
+ int status;
+
+ status = start_polling(self->dev_idex, self->proto, self->virtual_nci_fd,
+ self->sd, self->fid, self->pid);
+ EXPECT_EQ(status, 0);
+
+ status = stop_polling(self->dev_idex, self->virtual_nci_fd, self->sd,
+ self->fid, self->pid);
+ EXPECT_EQ(status, 0);
+}
+
+int get_taginfo(int dev_idx, int sd, int fid, int pid)
+{
+ struct {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[512];
+ } ans;
+
+ struct nlattr *na;
+ __u32 protocol;
+ int targetidx;
+ __u8 sel_res;
+ int resp_len;
+ int len;
+
+ __u16 tagid_type;
+ void *tagid_type_data;
+ int tagid_len;
+
+ tagid_type = NFC_ATTR_DEVICE_INDEX;
+ tagid_type_data = &dev_idx;
+ tagid_len = 4;
+
+ send_cmd_mt_nla(sd, fid, pid, NFC_CMD_GET_TARGET, 1, &tagid_type,
+ &tagid_type_data, &tagid_len, NLM_F_REQUEST | NLM_F_DUMP);
+ resp_len = recv(sd, &ans, sizeof(ans), 0);
+ if (ans.n.nlmsg_type == NLMSG_ERROR || resp_len < 0 ||
+ !NLMSG_OK(&ans.n, resp_len))
+ return -1;
+
+ resp_len = GENLMSG_PAYLOAD(&ans.n);
+ na = (struct nlattr *)GENLMSG_DATA(&ans);
+
+ len = 0;
+ targetidx = -1;
+ protocol = -1;
+ sel_res = -1;
+
+ while (len < resp_len) {
+ len += NLA_ALIGN(na->nla_len);
+
+ if (na->nla_type == NFC_ATTR_TARGET_INDEX)
+ targetidx = *(int *)((char *)na + NLA_HDRLEN);
+ else if (na->nla_type == NFC_ATTR_TARGET_SEL_RES)
+ sel_res = *(__u8 *)((char *)na + NLA_HDRLEN);
+ else if (na->nla_type == NFC_ATTR_PROTOCOLS)
+ protocol = *(__u32 *)((char *)na + NLA_HDRLEN);
+
+ na = (struct nlattr *)(GENLMSG_DATA(&ans) + len);
+ }
+
+ if (targetidx == -1 || sel_res != 0x20 || protocol != NFC_PROTO_ISO14443_MASK)
+ return -1;
+
+ return targetidx;
+}
+
+int connect_socket(int dev_idx, int target_idx)
+{
+ struct sockaddr_nfc addr;
+ int sock;
+ int err = 0;
+
+ sock = socket(AF_NFC, SOCK_SEQPACKET, NFC_SOCKPROTO_RAW);
+ if (sock == -1)
+ return -1;
+
+ addr.sa_family = AF_NFC;
+ addr.dev_idx = dev_idx;
+ addr.target_idx = target_idx;
+ addr.nfc_protocol = NFC_PROTO_ISO14443;
+ err = connect(sock, (struct sockaddr *)&addr, sizeof(addr));
+ if (err) {
+ close(sock);
+ return -1;
+ }
+
+ return sock;
+}
+
+int connect_tag(int dev_idx, int virtual_fd, int sd, int fid, int pid)
+{
+ struct genlmsghdr *genlhdr;
+ struct nlattr *na;
+ char evt_data[255];
+ int target_idx;
+ int resp_len;
+ int evt_dev;
+
+ write(virtual_fd, nci_rf_activate_ntf, sizeof(nci_rf_activate_ntf));
+ resp_len = recv(sd, evt_data, sizeof(evt_data), 0);
+ if (resp_len < 0)
+ return -1;
+
+ genlhdr = (struct genlmsghdr *)((struct nlmsghdr *)evt_data + 1);
+ na = (struct nlattr *)(genlhdr + 1);
+ evt_dev = *(int *)((char *)na + NLA_HDRLEN);
+ if (dev_idx != evt_dev)
+ return -1;
+
+ target_idx = get_taginfo(dev_idx, sd, fid, pid);
+ if (target_idx == -1)
+ return -1;
+ return connect_socket(dev_idx, target_idx);
+}
+
+int read_write_nci_cmd(int nfc_sock, int virtual_fd, const __u8 *cmd, __u32 cmd_len,
+ const __u8 *rsp, __u32 rsp_len)
+{
+ char buf[256];
+ int len;
+
+ send(nfc_sock, &cmd[3], cmd_len - 3, 0);
+ len = read(virtual_fd, buf, cmd_len);
+ if (len < 0 || memcmp(buf, cmd, cmd_len))
+ return -1;
+
+ write(virtual_fd, rsp, rsp_len);
+ len = recv(nfc_sock, buf, rsp_len - 2, 0);
+ if (len < 0 || memcmp(&buf[1], &rsp[3], rsp_len - 3))
+ return -1;
+
+ return 0;
+}
+
+int read_tag(int nfc_sock, int virtual_fd)
+{
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd,
+ sizeof(nci_t4t_select_cmd), nci_t4t_rsp_ok,
+ sizeof(nci_t4t_rsp_ok)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd2,
+ sizeof(nci_t4t_select_cmd2), nci_t4t_rsp_ok,
+ sizeof(nci_t4t_rsp_ok)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd,
+ sizeof(nci_t4t_read_cmd), nci_t4t_read_rsp,
+ sizeof(nci_t4t_read_rsp)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd3,
+ sizeof(nci_t4t_select_cmd3), nci_t4t_rsp_ok,
+ sizeof(nci_t4t_rsp_ok)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd2,
+ sizeof(nci_t4t_read_cmd2), nci_t4t_read_rsp2,
+ sizeof(nci_t4t_read_rsp2)))
+ return -1;
+
+ return read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd3,
+ sizeof(nci_t4t_read_cmd3), nci_t4t_read_rsp3,
+ sizeof(nci_t4t_read_rsp3));
+}
+
+static void *virtual_deactivate_proc(void *data)
+{
+ int virtual_fd;
+ char buf[256];
+ int deactcmd_len;
+ int len;
+
+ virtual_fd = *(int *)data;
+ deactcmd_len = sizeof(nci_rf_deact_cmd);
+ len = read(virtual_fd, buf, deactcmd_len);
+ if (len != deactcmd_len || memcmp(buf, nci_rf_deact_cmd, deactcmd_len))
+ return (void *)-1;
+
+ write(virtual_fd, nci_rf_deact_rsp, sizeof(nci_rf_deact_rsp));
+ write(virtual_fd, nci_rf_deact_ntf, sizeof(nci_rf_deact_ntf));
+
+ return (void *)0;
+}
+
+int disconnect_tag(int nfc_sock, int virtual_fd)
+{
+ pthread_t thread_t;
+ char buf[256];
+ int status;
+ int len;
+
+ send(nfc_sock, &nci_t4t_select_cmd3[3], sizeof(nci_t4t_select_cmd3) - 3, 0);
+ len = read(virtual_fd, buf, sizeof(nci_t4t_select_cmd3));
+ if (len < 0 || memcmp(buf, nci_t4t_select_cmd3, sizeof(nci_t4t_select_cmd3)))
+ return -1;
+
+ len = recv(nfc_sock, buf, sizeof(nci_t4t_rsp_ok), 0);
+ if (len != -1)
+ return -1;
+
+ status = pthread_create(&thread_t, NULL, virtual_deactivate_proc,
+ (void *)&virtual_fd);
+
+ close(nfc_sock);
pthread_join(thread_t, (void **)&status);
+ return status;
+}
+
+TEST_F(NCI, t4t_tag_read)
+{
+ int nfc_sock;
+ int status;
+
+ status = start_polling(self->dev_idex, self->proto, self->virtual_nci_fd,
+ self->sd, self->fid, self->pid);
+ EXPECT_EQ(status, 0);
+
+ nfc_sock = connect_tag(self->dev_idex, self->virtual_nci_fd, self->sd,
+ self->fid, self->pid);
+ ASSERT_GT(nfc_sock, -1);
+
+ status = read_tag(nfc_sock, self->virtual_nci_fd);
ASSERT_EQ(status, 0);
+
+ status = disconnect_tag(nfc_sock, self->virtual_nci_fd);
+ EXPECT_EQ(status, 0);
}
TEST_F(NCI, deinit)
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 61ae899cfc17..19deb9cdf72f 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -30,3 +30,4 @@ hwtstamp_config
rxtimestamp
timestamping
txtimestamp
+so_netns_cookie
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3915bb7bfc39..492b273743b4 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -25,19 +25,25 @@ TEST_PROGS += bareudp.sh
TEST_PROGS += unicast_extensions.sh
TEST_PROGS += udpgro_fwd.sh
TEST_PROGS += veth.sh
+TEST_PROGS += ioam6.sh
+TEST_PROGS += gro.sh
+TEST_PROGS += gre_gso.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
-TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
+TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie
TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_FILES += fin_ack_lat
TEST_GEN_FILES += reuseaddr_ports_exhausted
TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
TEST_GEN_FILES += ipsec
+TEST_GEN_FILES += ioam6_parser
+TEST_GEN_FILES += gro
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_FILES += toeplitz
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
new file mode 100644
index 000000000000..df341648f818
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -0,0 +1,2 @@
+TEST_GEN_PROGS := test_unix_oob
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
new file mode 100644
index 000000000000..3dece8b29253
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/poll.h>
+
+static int pipefd[2];
+static int signal_recvd;
+static pid_t producer_id;
+static char sock_name[32];
+
+static void sig_hand(int sn, siginfo_t *si, void *p)
+{
+ signal_recvd = sn;
+}
+
+static int set_sig_handler(int signal)
+{
+ struct sigaction sa;
+
+ sa.sa_sigaction = sig_hand;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO | SA_RESTART;
+
+ return sigaction(signal, &sa, NULL);
+}
+
+static void set_filemode(int fd, int set)
+{
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ if (set)
+ flags &= ~O_NONBLOCK;
+ else
+ flags |= O_NONBLOCK;
+ fcntl(fd, F_SETFL, flags);
+}
+
+static void signal_producer(int fd)
+{
+ char cmd;
+
+ cmd = 'S';
+ write(fd, &cmd, sizeof(cmd));
+}
+
+static void wait_for_signal(int fd)
+{
+ char buf[5];
+
+ read(fd, buf, 5);
+}
+
+static void die(int status)
+{
+ fflush(NULL);
+ unlink(sock_name);
+ kill(producer_id, SIGTERM);
+ exit(status);
+}
+
+int is_sioctatmark(int fd)
+{
+ int ans = -1;
+
+ if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
+#ifdef DEBUG
+ perror("SIOCATMARK Failed");
+#endif
+ }
+ return ans;
+}
+
+void read_oob(int fd, char *c)
+{
+
+ *c = ' ';
+ if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
+#ifdef DEBUG
+ perror("Reading MSG_OOB Failed");
+#endif
+ }
+}
+
+int read_data(int pfd, char *buf, int size)
+{
+ int len = 0;
+
+ memset(buf, size, '0');
+ len = read(pfd, buf, size);
+#ifdef DEBUG
+ if (len < 0)
+ perror("read failed");
+#endif
+ return len;
+}
+
+static void wait_for_data(int pfd, int event)
+{
+ struct pollfd pfds[1];
+
+ pfds[0].fd = pfd;
+ pfds[0].events = event;
+ poll(pfds, 1, -1);
+}
+
+void producer(struct sockaddr_un *consumer_addr)
+{
+ int cfd;
+ char buf[64];
+ int i;
+
+ memset(buf, 'x', sizeof(buf));
+ cfd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+ wait_for_signal(pipefd[0]);
+ if (connect(cfd, (struct sockaddr *)consumer_addr,
+ sizeof(struct sockaddr)) != 0) {
+ perror("Connect failed");
+ kill(0, SIGTERM);
+ exit(1);
+ }
+
+ for (i = 0; i < 2; i++) {
+ /* Test 1: Test for SIGURG and OOB */
+ wait_for_signal(pipefd[0]);
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '@';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 2: Test for OOB being overwitten */
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '%';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '#';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 3: Test for SIOCATMARK */
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '@';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '%';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ send(cfd, buf, sizeof(buf), 0);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 4: Test for 1byte OOB msg */
+ memset(buf, 'x', sizeof(buf));
+ buf[0] = '@';
+ send(cfd, buf, 1, MSG_OOB);
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int lfd, pfd;
+ struct sockaddr_un consumer_addr, paddr;
+ socklen_t len = sizeof(consumer_addr);
+ char buf[1024];
+ int on = 0;
+ char oob;
+ int flags;
+ int atmark;
+ char *tmp_file;
+
+ lfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ memset(&consumer_addr, 0, sizeof(consumer_addr));
+ consumer_addr.sun_family = AF_UNIX;
+ sprintf(sock_name, "unix_oob_%d", getpid());
+ unlink(sock_name);
+ strcpy(consumer_addr.sun_path, sock_name);
+
+ if ((bind(lfd, (struct sockaddr *)&consumer_addr,
+ sizeof(consumer_addr))) != 0) {
+ perror("socket bind failed");
+ exit(1);
+ }
+
+ pipe(pipefd);
+
+ listen(lfd, 1);
+
+ producer_id = fork();
+ if (producer_id == 0) {
+ producer(&consumer_addr);
+ exit(0);
+ }
+
+ set_sig_handler(SIGURG);
+ signal_producer(pipefd[1]);
+
+ pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
+ fcntl(pfd, F_SETOWN, getpid());
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 1:
+ * veriyf that SIGURG is
+ * delivered and 63 bytes are
+ * read and oob is '@'
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ read_oob(pfd, &oob);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 63 || oob != '@') {
+ fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 2:
+ * Verify that the first OOB is over written by
+ * the 2nd one and the first OOB is returned as
+ * part of the read, and sigurg is received.
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = 0;
+ while (len < 70)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ read_oob(pfd, &oob);
+ if (!signal_recvd || len != 127 || oob != '#') {
+ fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 3:
+ * verify that 2nd oob over writes
+ * the first one and read breaks at
+ * oob boundary returning 127 bytes
+ * and sigurg is received and atmark
+ * is set.
+ * oob is '%' and second read returns
+ * 64 bytes.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 150)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ read_oob(pfd, &oob);
+
+ if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
+ fprintf(stderr,
+ "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
+ signal_recvd, len, oob, atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+
+ len = read_data(pfd, buf, 1024);
+ if (len != 64) {
+ fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 4:
+ * verify that a single byte
+ * oob message is delivered.
+ * set non blocking mode and
+ * check proper error is
+ * returned and sigurg is
+ * received and correct
+ * oob is read.
+ */
+
+ set_filemode(pfd, 0);
+
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+ if ((len == -1) && (errno == 11))
+ len = 0;
+
+ read_oob(pfd, &oob);
+
+ if (!signal_recvd || len != 0 || oob != '@') {
+ fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ set_filemode(pfd, 1);
+
+ /* Inline Testing */
+
+ on = 1;
+ if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
+ perror("SO_OOBINLINE");
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 1 -- Inline:
+ * Check that SIGURG is
+ * delivered and 63 bytes are
+ * read and oob is '@'
+ */
+
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+
+ if (!signal_recvd || len != 63) {
+ fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
+ signal_recvd, len);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+
+ if (len != 1) {
+ fprintf(stderr,
+ "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 2 -- Inline:
+ * Verify that the first OOB is over written by
+ * the 2nd one and read breaks correctly on
+ * 2nd OOB boundary with the first OOB returned as
+ * part of the read, and sigurg is delivered and
+ * siocatmark returns true.
+ * next read returns one byte, the oob byte
+ * and siocatmark returns false.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 70)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 127 || atmark != 1 || !signal_recvd) {
+ fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
+ len, atmark);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 1 || buf[0] != '#' || atmark == 1) {
+ fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
+ len, buf[0], atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 3 -- Inline:
+ * verify that 2nd oob over writes
+ * the first one and read breaks at
+ * oob boundary returning 127 bytes
+ * and sigurg is received and siocatmark
+ * is true after the read.
+ * subsequent read returns 65 bytes
+ * because of oob which should be '%'.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 126)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (!signal_recvd || len != 127 || !atmark) {
+ fprintf(stderr,
+ "Test 3 Inline failed, sigurg %d len %d data %c\n",
+ signal_recvd, len, buf[0]);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 65 || buf[0] != '%' || atmark != 0) {
+ fprintf(stderr,
+ "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
+ len, buf[0], atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 4 -- Inline:
+ * verify that a single
+ * byte oob message is delivered
+ * and read returns one byte, the oob
+ * byte and sigurg is received
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 1 || buf[0] != '@') {
+ fprintf(stderr,
+ "Test 4 Inline failed, signal %d len %d data %c\n",
+ signal_recvd, len, buf[0]);
+ die(1);
+ }
+ die(0);
+}
diff --git a/tools/testing/selftests/net/altnames.sh b/tools/testing/selftests/net/altnames.sh
index 4254ddc3f70b..1ef9e4159bba 100755
--- a/tools/testing/selftests/net/altnames.sh
+++ b/tools/testing/selftests/net/altnames.sh
@@ -45,7 +45,7 @@ altnames_test()
check_err $? "Got unexpected long alternative name from link show JSON"
ip link property del $DUMMY_DEV altname $SHORT_NAME
- check_err $? "Failed to add short alternative name"
+ check_err $? "Failed to delete short alternative name"
ip -j -p link show $SHORT_NAME &>/dev/null
check_fail $? "Unexpected success while trying to do link show with deleted short alternative name"
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 614d5477365a..86ab429fe7f3 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,4 +1,5 @@
CONFIG_USER_NS=y
+CONFIG_NET_NS=y
CONFIG_BPF_SYSCALL=y
CONFIG_TEST_BPF=m
CONFIG_NUMA=y
@@ -41,3 +42,5 @@ CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_MIRRED=m
CONFIG_BAREUDP=m
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
+CONFIG_CRYPTO_SM4=y
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
index 834066d465fc..2b5d6ff87373 100755
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -18,6 +18,8 @@ import sys
#
+# Kselftest framework requirement - SKIP code is 4
+KSFT_SKIP=4
Port = collections.namedtuple('Port', 'bus_info name')
@@ -239,7 +241,11 @@ def main(cmdline=None):
assert stderr == ""
devs = json.loads(stdout)['dev']
- dev = list(devs.keys())[0]
+ if devs:
+ dev = list(devs.keys())[0]
+ else:
+ print("no devlink device was found, test skipped")
+ sys.exit(KSFT_SKIP)
cmd = "devlink dev show %s" % dev
stdout, stderr = run_command(cmd)
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index a8ad92850e63..3313566ce906 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -37,6 +37,9 @@
#
# server / client nomenclature relative to ns-A
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
VERBOSE=0
NSA_DEV=eth1
@@ -286,6 +289,12 @@ set_sysctl()
run_cmd sysctl -q -w $*
}
+# get sysctl values in NS-A
+get_sysctl()
+{
+ ${NSA_CMD} sysctl -n $*
+}
+
################################################################################
# Setup for tests
@@ -436,10 +445,13 @@ cleanup()
ip -netns ${NSA} link set dev ${NSA_DEV} down
ip -netns ${NSA} link del dev ${NSA_DEV}
+ ip netns pids ${NSA} | xargs kill 2>/dev/null
ip netns del ${NSA}
fi
+ ip netns pids ${NSB} | xargs kill 2>/dev/null
ip netns del ${NSB}
+ ip netns pids ${NSC} | xargs kill 2>/dev/null
ip netns del ${NSC} >/dev/null 2>&1
}
@@ -1000,6 +1012,60 @@ ipv4_tcp_md5()
run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET}
log_test $? 1 "MD5: VRF: Device must be a VRF - prefix"
+ test_ipv4_md5_vrf__vrf_server__no_bind_ifindex
+ test_ipv4_md5_vrf__global_server__bind_ifindex0
+}
+
+test_ipv4_md5_vrf__vrf_server__no_bind_ifindex()
+{
+ log_start
+ show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX"
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection"
+
+ log_start
+ show_hint "Binding both the socket and the key is not required but it works"
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection"
+}
+
+test_ipv4_md5_vrf__global_server__bind_ifindex0()
+{
+ # This particular test needs tcp_l3mdev_accept=1 for Global server to accept VRF connections
+ local old_tcp_l3mdev_accept
+ old_tcp_l3mdev_accept=$(get_sysctl net.ipv4.tcp_l3mdev_accept)
+ set_sysctl net.ipv4.tcp_l3mdev_accept=1
+
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection"
+
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection"
+ log_start
+
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection"
+
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection"
+
+ # restore value
+ set_sysctl net.ipv4.tcp_l3mdev_accept="$old_tcp_l3mdev_accept"
}
ipv4_tcp_novrf()
@@ -3879,6 +3945,32 @@ use_case_ping_lla_multi()
log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C"
}
+# Perform IPv{4,6} SNAT on ns-A, and verify TCP connection is successfully
+# established with ns-B.
+use_case_snat_on_vrf()
+{
+ setup "yes"
+
+ local port="12345"
+
+ run_cmd iptables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+ run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+
+ run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} &
+ sleep 1
+ run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port}
+ log_test $? 0 "IPv4 TCP connection over VRF with SNAT"
+
+ run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} &
+ sleep 1
+ run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port}
+ log_test $? 0 "IPv6 TCP connection over VRF with SNAT"
+
+ # Cleanup
+ run_cmd iptables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+ run_cmd ip6tables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+}
+
use_cases()
{
log_section "Use cases"
@@ -3886,6 +3978,8 @@ use_cases()
use_case_br
log_subsection "Ping LLA with multiple interfaces"
use_case_ping_lla_multi
+ log_subsection "SNAT on VRF"
+ use_case_snat_on_vrf
}
################################################################################
@@ -3946,7 +4040,7 @@ fi
which nettest >/dev/null
if [ $? -ne 0 ]; then
echo "'nettest' command not found; skipping tests"
- exit 0
+ exit $ksft_skip
fi
declare -i nfail=0
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 49774a8a7736..0d293391e9a4 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -925,6 +925,14 @@ ipv6_fcnal_runtime()
run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1"
run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+ # route can not use prefsrc with nexthops
+ run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 from 2001:db8:91::1"
+ log_test $? 2 "IPv6 route can not use src routing with external nexthop"
+
+ # check cleanup path on invalid metric
+ run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 congctl lock foo"
+ log_test $? 2 "IPv6 route with invalid metric"
+
# rpfilter and default route
$IP nexthop flush >/dev/null 2>&1
run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP"
@@ -1366,6 +1374,10 @@ ipv4_fcnal_runtime()
run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
log_test $? 2 "Nexthop replace with invalid scope for existing route"
+ # check cleanup path on invalid metric
+ run_cmd "$IP ro add 172.16.101.2/32 nhid 22 congctl lock foo"
+ log_test $? 2 "IPv4 route with invalid metric"
+
#
# add route with nexthop and check traffic
#
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index a93e6b690e06..43ea8407a82e 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -3,6 +3,9 @@
# This test is for checking IPv4 and IPv6 FIB rules API
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
ret=0
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
@@ -238,12 +241,12 @@ run_fibrule_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
# start clean
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index d97bd6889446..72ee644d47bf 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -9,6 +9,7 @@ TEST_PROGS = bridge_igmp.sh \
gre_inner_v4_multipath.sh \
gre_inner_v6_multipath.sh \
gre_multipath.sh \
+ ip6_forward_instats_vrf.sh \
ip6gre_inner_v4_multipath.sh \
ip6gre_inner_v6_multipath.sh \
ipip_flat_gre_key.sh \
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
new file mode 100755
index 000000000000..a15d21dc035a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -0,0 +1,364 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution between two paths when using custom hash policy.
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.{2-253}/24 | |
+# | 2001:db8:1::{2-fd}/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|-------------------------+
+# | SW1 | |
+# | $rp1 + |
+# | 198.51.100.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | |
+# | $rp11 + + $rp12 |
+# | 192.0.2.1/28 | | 192.0.2.17/28 |
+# | 2001:db8:2::1/64 | | 2001:db8:3::1/64 |
+# +------------------|-------------|------------------+
+# | |
+# +------------------|-------------|------------------+
+# | SW2 | | |
+# | | | |
+# | $rp21 + + $rp22 |
+# | 192.0.2.2/28 192.0.2.18/28 |
+# | 2001:db8:2::2/64 2001:db8:3::2/64 |
+# | |
+# | |
+# | $rp2 + |
+# | 203.0.113.1/24 | |
+# | 2001:db8:4::1/64 | |
+# +-------------------------|-------------------------+
+# |
+# +-------------------------|------+
+# | H2 | |
+# | $h2 + |
+# | 203.0.113.{2-253}/24 |
+# | 2001:db8:4::{2-fd}/64 |
+# +--------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ custom_hash
+"
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+ ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+ ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 default
+ ip route del vrf v$h1 default
+ simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+ simple_if_init $rp1 198.51.100.1/24 2001:db8:1::1/64
+ __simple_if_init $rp11 v$rp1 192.0.2.1/28 2001:db8:2::1/64
+ __simple_if_init $rp12 v$rp1 192.0.2.17/28 2001:db8:3::1/64
+
+ ip route add vrf v$rp1 203.0.113.0/24 \
+ nexthop via 192.0.2.2 dev $rp11 \
+ nexthop via 192.0.2.18 dev $rp12
+
+ ip -6 route add vrf v$rp1 2001:db8:4::/64 \
+ nexthop via 2001:db8:2::2 dev $rp11 \
+ nexthop via 2001:db8:3::2 dev $rp12
+}
+
+sw1_destroy()
+{
+ ip -6 route del vrf v$rp1 2001:db8:4::/64
+
+ ip route del vrf v$rp1 203.0.113.0/24
+
+ __simple_if_fini $rp12 192.0.2.17/28 2001:db8:3::1/64
+ __simple_if_fini $rp11 192.0.2.1/28 2001:db8:2::1/64
+ simple_if_fini $rp1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+ simple_if_init $rp2 203.0.113.1/24 2001:db8:4::1/64
+ __simple_if_init $rp21 v$rp2 192.0.2.2/28 2001:db8:2::2/64
+ __simple_if_init $rp22 v$rp2 192.0.2.18/28 2001:db8:3::2/64
+
+ ip route add vrf v$rp2 198.51.100.0/24 \
+ nexthop via 192.0.2.1 dev $rp21 \
+ nexthop via 192.0.2.17 dev $rp22
+
+ ip -6 route add vrf v$rp2 2001:db8:1::/64 \
+ nexthop via 2001:db8:2::1 dev $rp21 \
+ nexthop via 2001:db8:3::1 dev $rp22
+}
+
+sw2_destroy()
+{
+ ip -6 route del vrf v$rp2 2001:db8:1::/64
+
+ ip route del vrf v$rp2 198.51.100.0/24
+
+ __simple_if_fini $rp22 192.0.2.18/28 2001:db8:3::2/64
+ __simple_if_fini $rp21 192.0.2.2/28 2001:db8:2::2/64
+ simple_if_fini $rp2 203.0.113.1/24 2001:db8:4::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.2/24 2001:db8:4::2/64
+ ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+ ip -6 route add vrf v$h2 default via 2001:db8:4::1 dev $h2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 default
+ ip route del vrf v$h2 default
+ simple_if_fini $h2 203.0.113.2/24 2001:db8:4::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+
+ rp1=${NETIFS[p2]}
+
+ rp11=${NETIFS[p3]}
+ rp21=${NETIFS[p4]}
+
+ rp12=${NETIFS[p5]}
+ rp22=${NETIFS[p6]}
+
+ rp2=${NETIFS[p7]}
+
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:4::2
+}
+
+send_src_ipv4()
+{
+ $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec v$h1 \
+ $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+}
+
+send_src_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:4::2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+ local field="$1"; shift
+ local balanced="$1"; shift
+ local send_flows="$@"
+
+ RET=0
+
+ local t0_rp11=$(link_stats_tx_packets_get $rp11)
+ local t0_rp12=$(link_stats_tx_packets_get $rp12)
+
+ $send_flows
+
+ local t1_rp11=$(link_stats_tx_packets_get $rp11)
+ local t1_rp12=$(link_stats_tx_packets_get $rp12)
+
+ local d_rp11=$((t1_rp11 - t0_rp11))
+ local d_rp12=$((t1_rp12 - t0_rp12))
+
+ local diff=$((d_rp12 - d_rp11))
+ local sum=$((d_rp11 + d_rp12))
+
+ local pct=$(echo "$diff / $sum * 100" | bc -l)
+ local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+ [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+ ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+ check_err $? "Expected traffic to be $balanced, but it is not"
+
+ log_test "Multipath hash field: $field ($balanced)"
+ log_info "Packets sent on path1 / path2: $d_rp11 / $d_rp12"
+}
+
+custom_hash_v4()
+{
+ log_info "Running IPv4 custom multipath hash tests"
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 3
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0001
+ custom_hash_test "Source IP" "balanced" send_src_ipv4
+ custom_hash_test "Source IP" "unbalanced" send_dst_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0002
+ custom_hash_test "Destination IP" "balanced" send_dst_ipv4
+ custom_hash_test "Destination IP" "unbalanced" send_src_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0010
+ custom_hash_test "Source port" "balanced" send_src_udp4
+ custom_hash_test "Source port" "unbalanced" send_dst_udp4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0020
+ custom_hash_test "Destination port" "balanced" send_dst_udp4
+ custom_hash_test "Destination port" "unbalanced" send_src_udp4
+
+ sysctl_restore net.ipv4.neigh.default.gc_thresh3
+ sysctl_restore net.ipv4.neigh.default.gc_thresh2
+ sysctl_restore net.ipv4.neigh.default.gc_thresh1
+
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+custom_hash_v6()
+{
+ log_info "Running IPv6 custom multipath hash tests"
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 3
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0001
+ custom_hash_test "Source IP" "balanced" send_src_ipv6
+ custom_hash_test "Source IP" "unbalanced" send_dst_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0002
+ custom_hash_test "Destination IP" "balanced" send_dst_ipv6
+ custom_hash_test "Destination IP" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0008
+ custom_hash_test "Flowlabel" "balanced" send_flowlabel
+ custom_hash_test "Flowlabel" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0010
+ custom_hash_test "Source port" "balanced" send_src_udp6
+ custom_hash_test "Source port" "unbalanced" send_dst_udp6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0020
+ custom_hash_test "Destination port" "balanced" send_dst_udp6
+ custom_hash_test "Destination port" "unbalanced" send_src_udp6
+
+ sysctl_restore net.ipv6.neigh.default.gc_thresh3
+ sysctl_restore net.ipv6.neigh.default.gc_thresh2
+ sysctl_restore net.ipv6.neigh.default.gc_thresh1
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+custom_hash()
+{
+ # Test that when the hash policy is set to custom, traffic is
+ # distributed only according to the fields set in the
+ # fib_multipath_hash_fields sysctl.
+ #
+ # Each time set a different field and make sure traffic is only
+ # distributed when the field is changed in the packet stream.
+ custom_hash_v4
+ custom_hash_v6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 9c12c4fd3afc..2c14a86adaaa 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -1,6 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
##############################################################################
# Defines
@@ -9,15 +12,21 @@ if [[ ! -v DEVLINK_DEV ]]; then
| jq -r '.port | keys[]' | cut -d/ -f-2)
if [ -z "$DEVLINK_DEV" ]; then
echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
- exit 1
+ exit $ksft_skip
fi
if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
echo "SKIP: devlink device's bus is not PCI"
- exit 1
+ exit $ksft_skip
fi
DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
-n | cut -d" " -f3)
+elif [[ ! -z "$DEVLINK_DEV" ]]; then
+ devlink dev show $DEVLINK_DEV &> /dev/null
+ if [ $? -ne 0 ]; then
+ echo "SKIP: devlink device \"$DEVLINK_DEV\" not found"
+ exit $ksft_skip
+ fi
fi
##############################################################################
@@ -26,19 +35,19 @@ fi
devlink help 2>&1 | grep resource &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink resource support"
- exit 1
+ exit $ksft_skip
fi
devlink help 2>&1 | grep trap &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink trap support"
- exit 1
+ exit $ksft_skip
fi
devlink dev help 2>&1 | grep info &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink dev info support"
- exit 1
+ exit $ksft_skip
fi
##############################################################################
@@ -318,6 +327,14 @@ devlink_trap_rx_bytes_get()
| jq '.[][][]["stats"]["rx"]["bytes"]'
}
+devlink_trap_drop_packets_get()
+{
+ local trap_name=$1; shift
+
+ devlink -js trap show $DEVLINK_DEV trap $trap_name \
+ | jq '.[][][]["stats"]["rx"]["dropped"]'
+}
+
devlink_trap_stats_idle_test()
{
local trap_name=$1; shift
@@ -339,6 +356,24 @@ devlink_trap_stats_idle_test()
fi
}
+devlink_trap_drop_stats_idle_test()
+{
+ local trap_name=$1; shift
+ local t0_packets t0_bytes
+
+ t0_packets=$(devlink_trap_drop_packets_get $trap_name)
+
+ sleep 1
+
+ t1_packets=$(devlink_trap_drop_packets_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets ]]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
devlink_traps_enable_all()
{
local trap_name
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index b802c14d2950..e5e2fbeca22e 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -39,3 +39,5 @@ NETIF_CREATE=yes
# Timeout (in seconds) before ping exits regardless of how many packets have
# been sent or received
PING_TIMEOUT=5
+# IPv6 traceroute utility name.
+TROUTE6=traceroute6
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
new file mode 100755
index 000000000000..a73f52efcb6c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -0,0 +1,456 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution when there are multiple paths between an IPv4 GRE
+# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.{2-253}/24 | |
+# | 2001:db8:1::{2-fd}/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 198.51.100.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | + g1 (gre) |
+# | loc=192.0.2.1 |
+# | rem=192.0.2.2 --. |
+# | tos=inherit | |
+# | v |
+# | + $ul1 |
+# | | 192.0.2.17/28 |
+# +---------------------|----------------------+
+# |
+# +---------------------|----------------------+
+# | SW2 | |
+# | $ul21 + |
+# | 192.0.2.18/28 | |
+# | | |
+# ! __________________+___ |
+# | / \ |
+# | | | |
+# | + $ul22.111 (vlan) + $ul22.222 (vlan) |
+# | | 192.0.2.33/28 | 192.0.2.49/28 |
+# | | | |
+# +--|----------------------|------------------+
+# | |
+# +--|----------------------|------------------+
+# | | | |
+# | + $ul32.111 (vlan) + $ul32.222 (vlan) |
+# | | 192.0.2.34/28 | 192.0.2.50/28 |
+# | | | |
+# | \__________________+___/ |
+# | | |
+# | | |
+# | $ul31 + |
+# | 192.0.2.65/28 | SW3 |
+# +---------------------|----------------------+
+# |
+# +---------------------|----------------------+
+# | + $ul4 |
+# | ^ 192.0.2.66/28 |
+# | | |
+# | + g2 (gre) | |
+# | loc=192.0.2.2 | |
+# | rem=192.0.2.1 --' |
+# | tos=inherit |
+# | |
+# | $ol4 + |
+# | 203.0.113.1/24 | |
+# | 2001:db8:2::1/64 | SW4 |
+# +-------------------------|------------------+
+# |
+# +-------------------------|------+
+# | | |
+# | $h2 + |
+# | 203.0.113.{2-253}/24 |
+# | 2001:db8:2::{2-fd}/64 H2 |
+# +--------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ custom_hash
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+ ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+ ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 default
+ ip route del vrf v$h1 default
+ simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64
+ __simple_if_init $ul1 v$ol1 192.0.2.17/28
+
+ tunnel_create g1 gre 192.0.2.1 192.0.2.2 tos inherit dev v$ol1
+ __simple_if_init g1 v$ol1 192.0.2.1/32
+ ip route add vrf v$ol1 192.0.2.2/32 via 192.0.2.18
+
+ ip route add vrf v$ol1 203.0.113.0/24 dev g1
+ ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+ ip -6 route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 203.0.113.0/24
+
+ ip route del vrf v$ol1 192.0.2.2/32
+ __simple_if_fini g1 192.0.2.1/32
+ tunnel_destroy g1
+
+ __simple_if_fini $ul1 192.0.2.17/28
+ simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+ simple_if_init $ul21 192.0.2.18/28
+ __simple_if_init $ul22 v$ul21
+ vlan_create $ul22 111 v$ul21 192.0.2.33/28
+ vlan_create $ul22 222 v$ul21 192.0.2.49/28
+
+ ip route add vrf v$ul21 192.0.2.1/32 via 192.0.2.17
+ ip route add vrf v$ul21 192.0.2.2/32 \
+ nexthop via 192.0.2.34 \
+ nexthop via 192.0.2.50
+}
+
+sw2_destroy()
+{
+ ip route del vrf v$ul21 192.0.2.2/32
+ ip route del vrf v$ul21 192.0.2.1/32
+
+ vlan_destroy $ul22 222
+ vlan_destroy $ul22 111
+ __simple_if_fini $ul22
+ simple_if_fini $ul21 192.0.2.18/28
+}
+
+sw3_create()
+{
+ simple_if_init $ul31 192.0.2.65/28
+ __simple_if_init $ul32 v$ul31
+ vlan_create $ul32 111 v$ul31 192.0.2.34/28
+ vlan_create $ul32 222 v$ul31 192.0.2.50/28
+
+ ip route add vrf v$ul31 192.0.2.2/32 via 192.0.2.66
+ ip route add vrf v$ul31 192.0.2.1/32 \
+ nexthop via 192.0.2.33 \
+ nexthop via 192.0.2.49
+
+ tc qdisc add dev $ul32 clsact
+ tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+ tc qdisc del dev $ul32 clsact
+
+ ip route del vrf v$ul31 192.0.2.1/32
+ ip route del vrf v$ul31 192.0.2.2/32
+
+ vlan_destroy $ul32 222
+ vlan_destroy $ul32 111
+ __simple_if_fini $ul32
+ simple_if_fini $ul31 192.0.2.65/28
+}
+
+sw4_create()
+{
+ simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64
+ __simple_if_init $ul4 v$ol4 192.0.2.66/28
+
+ tunnel_create g2 gre 192.0.2.2 192.0.2.1 tos inherit dev v$ol4
+ __simple_if_init g2 v$ol4 192.0.2.2/32
+ ip route add vrf v$ol4 192.0.2.1/32 via 192.0.2.65
+
+ ip route add vrf v$ol4 198.51.100.0/24 dev g2
+ ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+ ip -6 route del vrf v$ol4 2001:db8:1::/64
+ ip route del vrf v$ol4 198.51.100.0/24
+
+ ip route del vrf v$ol4 192.0.2.1/32
+ __simple_if_fini g2 192.0.2.2/32
+ tunnel_destroy g2
+
+ __simple_if_fini $ul4 192.0.2.66/28
+ simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64
+ ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+ ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 default
+ ip route del vrf v$h2 default
+ simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+
+ ol1=${NETIFS[p2]}
+ ul1=${NETIFS[p3]}
+
+ ul21=${NETIFS[p4]}
+ ul22=${NETIFS[p5]}
+
+ ul32=${NETIFS[p6]}
+ ul31=${NETIFS[p7]}
+
+ ul4=${NETIFS[p8]}
+ ol4=${NETIFS[p9]}
+
+ h2=${NETIFS[p10]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ sw3_create
+ sw4_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw4_destroy
+ sw3_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+send_src_ipv4()
+{
+ $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec v$h1 \
+ $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+}
+
+send_src_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+ local field="$1"; shift
+ local balanced="$1"; shift
+ local send_flows="$@"
+
+ RET=0
+
+ local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ $send_flows
+
+ local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+
+ local diff=$((d222 - d111))
+ local sum=$((d111 + d222))
+
+ local pct=$(echo "$diff / $sum * 100" | bc -l)
+ local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+ [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+ ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+ check_err $? "Expected traffic to be $balanced, but it is not"
+
+ log_test "Multipath hash field: $field ($balanced)"
+ log_info "Packets sent on path1 / path2: $d111 / $d222"
+}
+
+custom_hash_v4()
+{
+ log_info "Running IPv4 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv4
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp4
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp4
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp4
+
+ sysctl_restore net.ipv4.neigh.default.gc_thresh3
+ sysctl_restore net.ipv4.neigh.default.gc_thresh2
+ sysctl_restore net.ipv4.neigh.default.gc_thresh1
+}
+
+custom_hash_v6()
+{
+ log_info "Running IPv6 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv6
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0200
+ custom_hash_test "Inner flowlabel" "balanced" send_flowlabel
+ custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp6
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp6
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp6
+
+ sysctl_restore net.ipv6.neigh.default.gc_thresh3
+ sysctl_restore net.ipv6.neigh.default.gc_thresh2
+ sysctl_restore net.ipv6.neigh.default.gc_thresh1
+}
+
+custom_hash()
+{
+ # Test that when the hash policy is set to custom, traffic is
+ # distributed only according to the fields set in the
+ # fib_multipath_hash_fields sysctl.
+ #
+ # Each time set a different field and make sure traffic is only
+ # distributed when the field is changed in the packet stream.
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 3
+
+ custom_hash_v4
+ custom_hash_v6
+
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
new file mode 100755
index 000000000000..9f5b3e2e5e95
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test ipv6 stats on the incoming if when forwarding with VRF
+
+ALL_TESTS="
+ ipv6_ping
+ ipv6_in_too_big_err
+ ipv6_in_hdr_err
+ ipv6_in_addr_err
+ ipv6_in_discard
+"
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 2001:1:1::2/64
+ ip -6 route add vrf v$h1 2001:1:2::/64 via 2001:1:1::1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 2001:1:2::/64 via 2001:1:1::1
+ simple_if_fini $h1 2001:1:1::2/64
+}
+
+router_create()
+{
+ vrf_create router
+ __simple_if_init $rtr1 router 2001:1:1::1/64
+ __simple_if_init $rtr2 router 2001:1:2::1/64
+ mtu_set $rtr2 1280
+}
+
+router_destroy()
+{
+ mtu_restore $rtr2
+ __simple_if_fini $rtr2 2001:1:2::1/64
+ __simple_if_fini $rtr1 2001:1:1::1/64
+ vrf_destroy router
+}
+
+h2_create()
+{
+ simple_if_init $h2 2001:1:2::2/64
+ ip -6 route add vrf v$h2 2001:1:1::/64 via 2001:1:2::1
+ mtu_set $h2 1280
+}
+
+h2_destroy()
+{
+ mtu_restore $h2
+ ip -6 route del vrf v$h2 2001:1:1::/64 via 2001:1:2::1
+ simple_if_fini $h2 2001:1:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rtr1=${NETIFS[p2]}
+
+ rtr2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ h1_create
+ router_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ router_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ipv6_in_too_big_err()
+{
+ RET=0
+
+ local t0=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors)
+ local vrf_name=$(master_name_get $h1)
+
+ # Send too big packets
+ ip vrf exec $vrf_name \
+ $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+
+ local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors)
+ test "$((t1 - t0))" -ne 0
+ check_err $?
+ log_test "Ip6InTooBigErrors"
+}
+
+ipv6_in_hdr_err()
+{
+ RET=0
+
+ local t0=$(ipv6_stats_get $rtr1 Ip6InHdrErrors)
+ local vrf_name=$(master_name_get $h1)
+
+ # Send packets with hop limit 1, easiest with traceroute6 as some ping6
+ # doesn't allow hop limit to be specified
+ ip vrf exec $vrf_name \
+ $TROUTE6 2001:1:2::2 &> /dev/null
+
+ local t1=$(ipv6_stats_get $rtr1 Ip6InHdrErrors)
+ test "$((t1 - t0))" -ne 0
+ check_err $?
+ log_test "Ip6InHdrErrors"
+}
+
+ipv6_in_addr_err()
+{
+ RET=0
+
+ local t0=$(ipv6_stats_get $rtr1 Ip6InAddrErrors)
+ local vrf_name=$(master_name_get $h1)
+
+ # Disable forwarding temporary while sending the packet
+ sysctl -qw net.ipv6.conf.all.forwarding=0
+ ip vrf exec $vrf_name \
+ $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+ sysctl -qw net.ipv6.conf.all.forwarding=1
+
+ local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors)
+ test "$((t1 - t0))" -ne 0
+ check_err $?
+ log_test "Ip6InAddrErrors"
+}
+
+ipv6_in_discard()
+{
+ RET=0
+
+ local t0=$(ipv6_stats_get $rtr1 Ip6InDiscards)
+ local vrf_name=$(master_name_get $h1)
+
+ # Add a policy to discard
+ ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block
+ ip vrf exec $vrf_name \
+ $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+ ip xfrm policy del dst 2001:1:2::2/128 dir fwd
+
+ local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards)
+ test "$((t1 - t0))" -ne 0
+ check_err $?
+ log_test "Ip6InDiscards"
+}
+ipv6_ping()
+{
+ RET=0
+
+ ping6_test $h1 2001:1:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
new file mode 100755
index 000000000000..8fea2c2e0b25
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -0,0 +1,458 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution when there are multiple paths between an IPv6 GRE
+# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.{2-253}/24 | |
+# | 2001:db8:1::{2-fd}/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|-------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 198.51.100.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# |+ g1 (ip6gre) |
+# | loc=2001:db8:3::1 |
+# | rem=2001:db8:3::2 -. |
+# | tos=inherit | |
+# | v |
+# | + $ul1 |
+# | | 2001:db8:10::1/64 |
+# +---------------------|-----------------------+
+# |
+# +---------------------|-----------------------+
+# | SW2 | |
+# | $ul21 + |
+# | 2001:db8:10::2/64 | |
+# | | |
+# ! __________________+___ |
+# | / \ |
+# | | | |
+# | + $ul22.111 (vlan) + $ul22.222 (vlan) |
+# | | 2001:db8:11::1/64 | 2001:db8:12::1/64 |
+# | | | |
+# +--|----------------------|-------------------+
+# | |
+# +--|----------------------|-------------------+
+# | | | |
+# | + $ul32.111 (vlan) + $ul32.222 (vlan) |
+# | | 2001:db8:11::2/64 | 2001:db8:12::2/64 |
+# | | | |
+# | \__________________+___/ |
+# | | |
+# | | |
+# | $ul31 + |
+# | 2001:db8:13::1/64 | SW3 |
+# +---------------------|-----------------------+
+# |
+# +---------------------|-----------------------+
+# | + $ul4 |
+# | ^ 2001:db8:13::2/64 |
+# | | |
+# |+ g2 (ip6gre) | |
+# | loc=2001:db8:3::2 | |
+# | rem=2001:db8:3::1 -' |
+# | tos=inherit |
+# | |
+# | $ol4 + |
+# | 203.0.113.1/24 | |
+# | 2001:db8:2::1/64 | SW4 |
+# +-------------------------|-------------------+
+# |
+# +-------------------------|------+
+# | | |
+# | $h2 + |
+# | 203.0.113.{2-253}/24 |
+# | 2001:db8:2::{2-fd}/64 H2 |
+# +--------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ custom_hash
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+ ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+ ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 default
+ ip route del vrf v$h1 default
+ simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64
+ __simple_if_init $ul1 v$ol1 2001:db8:10::1/64
+
+ tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+ dev v$ol1
+ __simple_if_init g1 v$ol1 2001:db8:3::1/128
+ ip route add vrf v$ol1 2001:db8:3::2/128 via 2001:db8:10::2
+
+ ip route add vrf v$ol1 203.0.113.0/24 dev g1
+ ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+ ip -6 route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 203.0.113.0/24
+
+ ip route del vrf v$ol1 2001:db8:3::2/128
+ __simple_if_fini g1 2001:db8:3::1/128
+ tunnel_destroy g1
+
+ __simple_if_fini $ul1 2001:db8:10::1/64
+ simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+ simple_if_init $ul21 2001:db8:10::2/64
+ __simple_if_init $ul22 v$ul21
+ vlan_create $ul22 111 v$ul21 2001:db8:11::1/64
+ vlan_create $ul22 222 v$ul21 2001:db8:12::1/64
+
+ ip -6 route add vrf v$ul21 2001:db8:3::1/128 via 2001:db8:10::1
+ ip -6 route add vrf v$ul21 2001:db8:3::2/128 \
+ nexthop via 2001:db8:11::2 \
+ nexthop via 2001:db8:12::2
+}
+
+sw2_destroy()
+{
+ ip -6 route del vrf v$ul21 2001:db8:3::2/128
+ ip -6 route del vrf v$ul21 2001:db8:3::1/128
+
+ vlan_destroy $ul22 222
+ vlan_destroy $ul22 111
+ __simple_if_fini $ul22
+ simple_if_fini $ul21 2001:db8:10::2/64
+}
+
+sw3_create()
+{
+ simple_if_init $ul31 2001:db8:13::1/64
+ __simple_if_init $ul32 v$ul31
+ vlan_create $ul32 111 v$ul31 2001:db8:11::2/64
+ vlan_create $ul32 222 v$ul31 2001:db8:12::2/64
+
+ ip -6 route add vrf v$ul31 2001:db8:3::2/128 via 2001:db8:13::2
+ ip -6 route add vrf v$ul31 2001:db8:3::1/128 \
+ nexthop via 2001:db8:11::1 \
+ nexthop via 2001:db8:12::1
+
+ tc qdisc add dev $ul32 clsact
+ tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+ tc qdisc del dev $ul32 clsact
+
+ ip -6 route del vrf v$ul31 2001:db8:3::1/128
+ ip -6 route del vrf v$ul31 2001:db8:3::2/128
+
+ vlan_destroy $ul32 222
+ vlan_destroy $ul32 111
+ __simple_if_fini $ul32
+ simple_if_fini $ul31 2001:db8:13::1/64
+}
+
+sw4_create()
+{
+ simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64
+ __simple_if_init $ul4 v$ol4 2001:db8:13::2/64
+
+ tunnel_create g2 ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \
+ dev v$ol4
+ __simple_if_init g2 v$ol4 2001:db8:3::2/128
+ ip -6 route add vrf v$ol4 2001:db8:3::1/128 via 2001:db8:13::1
+
+ ip route add vrf v$ol4 198.51.100.0/24 dev g2
+ ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+ ip -6 route del vrf v$ol4 2001:db8:1::/64
+ ip route del vrf v$ol4 198.51.100.0/24
+
+ ip -6 route del vrf v$ol4 2001:db8:3::1/128
+ __simple_if_fini g2 2001:db8:3::2/128
+ tunnel_destroy g2
+
+ __simple_if_fini $ul4 2001:db8:13::2/64
+ simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64
+ ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+ ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 default
+ ip route del vrf v$h2 default
+ simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+
+ ol1=${NETIFS[p2]}
+ ul1=${NETIFS[p3]}
+
+ ul21=${NETIFS[p4]}
+ ul22=${NETIFS[p5]}
+
+ ul32=${NETIFS[p6]}
+ ul31=${NETIFS[p7]}
+
+ ul4=${NETIFS[p8]}
+ ol4=${NETIFS[p9]}
+
+ h2=${NETIFS[p10]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ sw3_create
+ sw4_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw4_destroy
+ sw3_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+send_src_ipv4()
+{
+ $MZ $h1 -q -p 64 -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+ $MZ $h1 -q -p 64 -A 198.51.100.2 -B 203.0.113.2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+ -d 1msec -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec v$h1 \
+ $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+}
+
+send_src_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+ $MZ -6 $h1 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+ local field="$1"; shift
+ local balanced="$1"; shift
+ local send_flows="$@"
+
+ RET=0
+
+ local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ $send_flows
+
+ local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+
+ local diff=$((d222 - d111))
+ local sum=$((d111 + d222))
+
+ local pct=$(echo "$diff / $sum * 100" | bc -l)
+ local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+ [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+ ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+ check_err $? "Expected traffic to be $balanced, but it is not"
+
+ log_test "Multipath hash field: $field ($balanced)"
+ log_info "Packets sent on path1 / path2: $d111 / $d222"
+}
+
+custom_hash_v4()
+{
+ log_info "Running IPv4 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv4
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp4
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp4
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp4
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp4
+
+ sysctl_restore net.ipv4.neigh.default.gc_thresh3
+ sysctl_restore net.ipv4.neigh.default.gc_thresh2
+ sysctl_restore net.ipv4.neigh.default.gc_thresh1
+}
+
+custom_hash_v6()
+{
+ log_info "Running IPv6 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv6
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0200
+ custom_hash_test "Inner flowlabel" "balanced" send_flowlabel
+ custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp6
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp6
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp6
+
+ sysctl_restore net.ipv6.neigh.default.gc_thresh3
+ sysctl_restore net.ipv6.neigh.default.gc_thresh2
+ sysctl_restore net.ipv6.neigh.default.gc_thresh1
+}
+
+custom_hash()
+{
+ # Test that when the hash policy is set to custom, traffic is
+ # distributed only according to the fields set in the
+ # fib_multipath_hash_fields sysctl.
+ #
+ # Each time set a different field and make sure traffic is only
+ # distributed when the field is changed in the packet stream.
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 3
+
+ custom_hash_v4
+ custom_hash_v6
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 42e28c983d41..92087d423bcf 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -4,6 +4,9 @@
##############################################################################
# Defines
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
# Can be overridden by the configuration file.
PING=${PING:=ping}
PING6=${PING6:=ping6}
@@ -38,7 +41,7 @@ check_tc_version()
tc -j &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing JSON support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -51,7 +54,7 @@ check_tc_mpls_support()
matchall action pipe &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing MPLS support"
- return 1
+ return $ksft_skip
fi
tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
matchall
@@ -69,7 +72,7 @@ check_tc_mpls_lse_stats()
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
- return 1
+ return $ksft_skip
fi
tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
@@ -79,7 +82,7 @@ check_tc_mpls_lse_stats()
if [[ $ret -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
- return 1
+ return $ksft_skip
fi
}
@@ -88,7 +91,7 @@ check_tc_shblock_support()
tc filter help 2>&1 | grep block &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing shared block support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -97,7 +100,7 @@ check_tc_chain_support()
tc help 2>&1|grep chain &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing chain support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -106,7 +109,7 @@ check_tc_action_hw_stats_support()
tc actions help 2>&1 | grep -q hw_stats
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -115,13 +118,13 @@ check_ethtool_lanes_support()
ethtool --help 2>&1| grep lanes &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: ethtool too old; it is missing lanes support"
- exit 1
+ exit $ksft_skip
fi
}
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [[ "$CHECK_TC" = "yes" ]]; then
@@ -134,7 +137,7 @@ require_command()
if [[ ! -x "$(command -v "$cmd")" ]]; then
echo "SKIP: $cmd not installed"
- exit 1
+ exit $ksft_skip
fi
}
@@ -143,7 +146,7 @@ require_command $MZ
if [[ ! -v NUM_NETIFS ]]; then
echo "SKIP: importer does not define \"NUM_NETIFS\""
- exit 1
+ exit $ksft_skip
fi
##############################################################################
@@ -203,7 +206,7 @@ for ((i = 1; i <= NUM_NETIFS; ++i)); do
ip link show dev ${NETIFS[p$i]} &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: could not find all required interfaces"
- exit 1
+ exit $ksft_skip
fi
done
@@ -748,6 +751,14 @@ qdisc_parent_stats_get()
| jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
}
+ipv6_stats_get()
+{
+ local dev=$1; shift
+ local stat=$1; shift
+
+ cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
+}
+
humanize()
{
local speed=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index 55eeacf59241..64fbd211d907 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -75,7 +75,9 @@ switch_destroy()
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
index 5f20d289ee43..10e594c55117 100755
--- a/tools/testing/selftests/net/forwarding/pedit_l4port.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
@@ -71,7 +71,9 @@ switch_destroy()
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 76efb1f8375e..a0d612e04990 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -411,7 +411,7 @@ ping_ipv6()
ip nexthop ls >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "Nexthop objects not supported; skipping tests"
- exit 0
+ exit $ksft_skip
fi
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index 4898dd4118f1..cb08ffe2356a 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -386,7 +386,7 @@ ping_ipv6()
ip nexthop ls >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "Nexthop objects not supported; skipping tests"
- exit 0
+ exit $ksft_skip
fi
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
index e3bd8a6bb8b4..bde11dc27873 100755
--- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh
+++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
@@ -72,7 +72,9 @@ switch_destroy()
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh
new file mode 100755
index 000000000000..facbb0c80443
--- /dev/null
+++ b/tools/testing/selftests/net/gre_gso.sh
@@ -0,0 +1,236 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking GRE GSO.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="gre_gso"
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+IP="ip -netns ns1"
+NS_EXEC="ip netns exec ns1"
+TMPFILE=`mktemp`
+PID=
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+}
+
+setup()
+{
+ set -e
+ ip netns add ns1
+ ip netns set ns1 auto
+ $IP link set dev lo up
+
+ ip link add veth0 type veth peer name veth1
+ ip link set veth0 up
+ ip link set veth1 netns ns1
+ $IP link set veth1 name veth0
+ $IP link set veth0 up
+
+ dd if=/dev/urandom of=$TMPFILE bs=1024 count=2048 &>/dev/null
+ set +e
+}
+
+cleanup()
+{
+ rm -rf $TMPFILE
+ [ -n "$PID" ] && kill $PID
+ ip link del dev gre1 &> /dev/null
+ ip link del dev veth0 &> /dev/null
+ ip netns del ns1
+}
+
+get_linklocal()
+{
+ local dev=$1
+ local ns=$2
+ local addr
+
+ [ -n "$ns" ] && ns="-netns $ns"
+
+ addr=$(ip -6 -br $ns addr show dev ${dev} | \
+ awk '{
+ for (i = 3; i <= NF; ++i) {
+ if ($i ~ /^fe80/)
+ print $i
+ }
+ }'
+ )
+ addr=${addr/\/*}
+
+ [ -z "$addr" ] && return 1
+
+ echo $addr
+
+ return 0
+}
+
+gre_create_tun()
+{
+ local a1=$1
+ local a2=$2
+ local mode
+
+ [[ $a1 =~ ^[0-9.]*$ ]] && mode=gre || mode=ip6gre
+
+ ip tunnel add gre1 mode $mode local $a1 remote $a2 dev veth0
+ ip link set gre1 up
+ $IP tunnel add gre1 mode $mode local $a2 remote $a1 dev veth0
+ $IP link set gre1 up
+}
+
+gre_gst_test_checks()
+{
+ local name=$1
+ local addr=$2
+
+ $NS_EXEC nc -kl $port >/dev/null &
+ PID=$!
+ while ! $NS_EXEC ss -ltn | grep -q $port; do ((i++)); sleep 0.01; done
+
+ cat $TMPFILE | timeout 1 nc $addr $port
+ log_test $? 0 "$name - copy file w/ TSO"
+
+ ethtool -K veth0 tso off
+
+ cat $TMPFILE | timeout 1 nc $addr $port
+ log_test $? 0 "$name - copy file w/ GSO"
+
+ ethtool -K veth0 tso on
+
+ kill $PID
+ PID=
+}
+
+gre6_gso_test()
+{
+ local port=7777
+
+ setup
+
+ a1=$(get_linklocal veth0)
+ a2=$(get_linklocal veth0 ns1)
+
+ gre_create_tun $a1 $a2
+
+ ip addr add 172.16.2.1/24 dev gre1
+ $IP addr add 172.16.2.2/24 dev gre1
+
+ ip -6 addr add 2001:db8:1::1/64 dev gre1 nodad
+ $IP -6 addr add 2001:db8:1::2/64 dev gre1 nodad
+
+ sleep 2
+
+ gre_gst_test_checks GREv6/v4 172.16.2.2
+ gre_gst_test_checks GREv6/v6 2001:db8:1::2
+
+ cleanup
+}
+
+gre_gso_test()
+{
+ gre6_gso_test
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhv o
+do
+ case $o in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+PEER_CMD="ip netns exec ${PEER_NS}"
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v nc)" ]; then
+ echo "SKIP: Could not run test without nc tool"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+ case $t in
+ gre_gso) gre_gso_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
new file mode 100644
index 000000000000..cf37ce86b0fd
--- /dev/null
+++ b/tools/testing/selftests/net/gro.c
@@ -0,0 +1,1095 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ * Data packets of the same size and same header setup with correct
+ * sequence numbers coalesce. The one exception being the last data
+ * packet coalesced: it can be smaller than the rest and coalesced
+ * as long as it is in the same flow.
+ * 2.ack
+ * Pure ACK does not coalesce.
+ * 3.flags
+ * Specific test cases: no packets with PSH, SYN, URG, RST set will
+ * be coalesced.
+ * 4.tcp
+ * Packets with incorrect checksum, non-consecutive seqno and
+ * different TCP header options shouldn't coalesce. Nit: given that
+ * some extension headers have paddings, such as timestamp, headers
+ * that are padding differently would not be coalesced.
+ * 5.ip:
+ * Packets with different (ECN, TTL, TOS) header, ip options or
+ * ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define SIP6 "fdaa::2"
+#define DIP6 "fdaa::1"
+#define SIP4 "192.168.1.200"
+#define DIP4 "192.168.1.100"
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+
+static void vlog(const char *fmt, ...)
+{
+ va_list args;
+
+ if (verbose) {
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ }
+}
+
+static void setup_sock_filter(int fd)
+{
+ const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+ const int ethproto_off = offsetof(struct ethhdr, h_proto);
+ int optlen = 0;
+ int ipproto_off;
+ int next_off;
+
+ if (proto == PF_INET)
+ next_off = offsetof(struct iphdr, protocol);
+ else
+ next_off = offsetof(struct ipv6hdr, nexthdr);
+ ipproto_off = ETH_HLEN + next_off;
+
+ if (strcmp(testname, "ip") == 0) {
+ if (proto == PF_INET)
+ optlen = sizeof(struct ip_timestamp);
+ else
+ optlen = sizeof(struct ip6_frag);
+ }
+
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+
+ struct sock_fprog bpf = {
+ .len = ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+ error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+ uint16_t *words = data;
+ int i;
+
+ for (i = 0; i < len / 2; i++)
+ sum += words[i];
+ if (len & 1)
+ sum += ((char *)data)[len - 1];
+ return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+ sum = checksum_nofold(data, len, sum);
+ while (sum > 0xFFFF)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+ struct pseudo_header6 {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph6;
+ struct pseudo_header4 {
+ struct in_addr saddr;
+ struct in_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph4;
+ uint32_t sum = 0;
+
+ if (proto == PF_INET6) {
+ if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1)
+ error(1, errno, "inet_pton6 source ip pseudo");
+ if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1)
+ error(1, errno, "inet_pton6 dest ip pseudo");
+ ph6.protocol = htons(IPPROTO_TCP);
+ ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+ } else if (proto == PF_INET) {
+ if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1)
+ error(1, errno, "inet_pton source ip pseudo");
+ if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1)
+ error(1, errno, "inet_pton dest ip pseudo");
+ ph4.protocol = htons(IPPROTO_TCP);
+ ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+ }
+
+ return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+ if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &mac_addr[0], &mac_addr[1], &mac_addr[2],
+ &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+ error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+ struct ethhdr *eth = buf;
+
+ memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+ memcpy(eth->h_source, src_mac, ETH_ALEN);
+ eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len)
+{
+ struct ipv6hdr *ip6h = buf;
+ struct iphdr *iph = buf;
+
+ if (proto == PF_INET6) {
+ memset(ip6h, 0, sizeof(*ip6h));
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+ ip6h->nexthdr = IPPROTO_TCP;
+ ip6h->hop_limit = 8;
+ if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1)
+ error(1, errno, "inet_pton source ip6");
+ if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1)
+ error(1, errno, "inet_pton dest ip6");
+ } else if (proto == PF_INET) {
+ memset(iph, 0, sizeof(*iph));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->ttl = 8;
+ iph->protocol = IPPROTO_TCP;
+ iph->tot_len = htons(sizeof(struct tcphdr) +
+ payload_len + sizeof(struct iphdr));
+ iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+ if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1)
+ error(1, errno, "inet_pton source ip");
+ if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1)
+ error(1, errno, "inet_pton dest ip");
+ iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+ }
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ struct tcphdr *tcph = buf;
+
+ memset(tcph, 0, sizeof(*tcph));
+
+ tcph->source = htons(SPORT);
+ tcph->dest = htons(DPORT);
+ tcph->seq = ntohl(START_SEQ + seq_offset);
+ tcph->ack_seq = ntohl(START_ACK + ack_offset);
+ tcph->ack = 1;
+ tcph->fin = fin;
+ tcph->doff = 5;
+ tcph->window = htons(TCP_MAXWIN);
+ tcph->urg_ptr = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+ int ret = -1;
+
+ ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+ if (ret == -1)
+ error(1, errno, "sendto failure");
+ if (ret != len)
+ error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ memset(buf, 0, total_hdr_len);
+ memset(buf + total_hdr_len, 'a', payload_len);
+ fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+ payload_len, fin);
+ fill_networklayer(buf + ETH_HLEN, payload_len);
+ fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+ int rst, int urg)
+{
+ static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ int payload_len, pkt_size, flag, i;
+ struct tcphdr *tcph;
+
+ payload_len = PAYLOAD_LEN * psh;
+ pkt_size = total_hdr_len + payload_len;
+ flag = NUM_PACKETS / 2;
+
+ create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+ tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+ tcph->psh = psh;
+ tcph->syn = syn;
+ tcph->rst = rst;
+ tcph->urg = urg;
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+
+ for (i = 0; i < NUM_PACKETS + 1; i++) {
+ if (i == flag) {
+ write_packet(fd, flag_buf, pkt_size, daddr);
+ continue;
+ }
+ create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+ }
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+ int payload_len1, int payload_len2)
+{
+ static char buf[ETH_HLEN + IP_MAXPACKET];
+
+ create_packet(buf, 0, 0, payload_len1, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+ create_packet(buf, payload_len1, 0, payload_len2, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+ static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+ static char last[TOTAL_HDR_LEN + MSS];
+ static char new_seg[TOTAL_HDR_LEN + MSS];
+ int i;
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ create_packet(pkts[i], i * MSS, 0, MSS, 0);
+ create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+ create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+ write_packet(fd, last, total_hdr_len + remainder, daddr);
+ write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN];
+
+ create_packet(buf, 0, 0, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ create_packet(buf, 0, 1, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+ struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ memmove(buf, no_ext, total_hdr_len);
+ memmove(buf + total_hdr_len + extlen,
+ no_ext + total_hdr_len, PAYLOAD_LEN);
+
+ tcphdr->doff = tcphdr->doff + (extlen / 4);
+ tcphdr->check = 0;
+ tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+ if (proto == PF_INET) {
+ iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ }
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+ struct tcp_option_ts {
+ uint8_t kind;
+ uint8_t len;
+ uint32_t tsval;
+ uint32_t tsecr;
+ } *opt_ts = (void *)buf;
+ struct tcp_option_window {
+ uint8_t kind;
+ uint8_t len;
+ uint8_t shift;
+ } *opt_window = (void *)buf;
+
+ switch (kind) {
+ case TCPOPT_NOP:
+ buf[0] = TCPOPT_NOP;
+ break;
+ case TCPOPT_WINDOW:
+ memset(opt_window, 0, sizeof(struct tcp_option_window));
+ opt_window->kind = TCPOPT_WINDOW;
+ opt_window->len = TCPOLEN_WINDOW;
+ opt_window->shift = 0;
+ break;
+ case TCPOPT_TIMESTAMP:
+ memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+ opt_ts->kind = TCPOPT_TIMESTAMP;
+ opt_ts->len = TCPOLEN_TIMESTAMP;
+ opt_ts->tsval = ts;
+ opt_ts->tsecr = 0;
+ break;
+ default:
+ error(1, 0, "unimplemented TCP option");
+ break;
+ }
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+ switch (order) {
+ case 0:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+ TCPOPT_TIMESTAMP, ts);
+ break;
+ case 1:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1,
+ TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+ TCPOPT_NOP, 0);
+ break;
+ case 2:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+ TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+ TCPOPT_NOP, 0);
+ break;
+ default:
+ error(1, 0, "unknown order");
+ break;
+ }
+ recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->check = tcph->check - 1;
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->seq = ntohl(htonl(tcph->seq) + 1);
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+ * don't coalesce.
+ */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 1);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 2);
+ write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+ int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+ int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+ recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+ write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+ struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+ int optlen = sizeof(struct ip_timestamp);
+ struct iphdr *iph;
+
+ if (optlen % 4)
+ error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+ ts->ipt_code = IPOPT_TS;
+ ts->ipt_len = optlen;
+ ts->ipt_ptr = 5;
+ ts->ipt_flg = IPOPT_TS_TSONLY;
+
+ memcpy(optpkt, buf, tcp_offset);
+ memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+ iph = (struct iphdr *)(optpkt + ETH_HLEN);
+ iph->ihl = 5 + (optlen / 4);
+ iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+ int optlen = sizeof(struct ip_timestamp);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+ add_ipv4_ts_option(buf, optpkt);
+ write_packet(fd, optpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/* IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[IP_MAXPACKET];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ /* Once fragmented, packet would retain the total_len.
+ * Tcp header is prepared as if rest of data is in follow-up frags,
+ * but follow up frags aren't actually sent.
+ */
+ memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+ fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+ fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+ fill_datalinklayer(buf);
+
+ iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ iph->ttl = 7;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ iph->tos = 1;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else if (proto == PF_INET6) {
+ ip6h->priority = 0xf;
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+ sizeof(struct ip6_frag)];
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+ int extlen = sizeof(struct ip6_frag);
+ int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+ int extpkt_len = bufpkt_len + extlen;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+ }
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ memset(extpkt, 0, extpkt_len);
+
+ ip6h->nexthdr = IPPROTO_FRAGMENT;
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ frag->ip6f_nxt = IPPROTO_TCP;
+
+ memcpy(extpkt, buf, tcp_offset);
+ memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+ write_packet(fd, extpkt, extpkt_len, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+ struct sockaddr_ll daddr = {};
+
+ daddr.sll_family = AF_PACKET;
+ daddr.sll_protocol = ethhdr_proto;
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+
+ if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+ error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+ struct timeval timeout;
+
+ timeout.tv_sec = 120;
+ timeout.tv_usec = 0;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+ sizeof(timeout)) < 0)
+ error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+ int correct_num_pkts)
+{
+ static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+ struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+ struct tcphdr *tcph;
+ bool bad_packet = false;
+ int tcp_ext_len = 0;
+ int ip_ext_len = 0;
+ int pkt_size = -1;
+ int data_len = 0;
+ int num_pkt = 0;
+ int i;
+
+ vlog("Expected {");
+ for (i = 0; i < correct_num_pkts; i++)
+ vlog("%d ", correct_payload[i]);
+ vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+ while (1) {
+ pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+ if (pkt_size < 0)
+ error(1, errno, "could not receive");
+
+ if (iph->version == 4)
+ ip_ext_len = (iph->ihl - 5) * 4;
+ else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+ ip_ext_len = sizeof(struct ip6_frag);
+
+ tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+ if (tcph->fin)
+ break;
+
+ tcp_ext_len = (tcph->doff - 5) * 4;
+ data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+ /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+ * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+ * Packet sockets are protocol agnostic, and will not trim the padding.
+ */
+ if (pkt_size == ETH_ZLEN && iph->version == 4) {
+ data_len = ntohs(iph->tot_len)
+ - sizeof(struct tcphdr) - sizeof(struct iphdr);
+ }
+ vlog("%d ", data_len);
+ if (data_len != correct_payload[num_pkt]) {
+ vlog("[!=%d]", correct_payload[num_pkt]);
+ bad_packet = true;
+ }
+ num_pkt++;
+ }
+ vlog("}, Total %d packets.\n", num_pkt);
+ if (num_pkt != correct_num_pkts)
+ error(1, 0, "incorrect number of packets");
+ if (bad_packet)
+ error(1, 0, "incorrect packet geometry");
+
+ printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+ static char fin_pkt[MAX_HDR_LEN];
+ struct sockaddr_ll daddr = {};
+ int txfd = -1;
+
+ txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (txfd < 0)
+ error(1, errno, "socket creation");
+
+ memset(&daddr, 0, sizeof(daddr));
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+ daddr.sll_family = AF_PACKET;
+ memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+ daddr.sll_halen = ETH_ALEN;
+ create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+ if (strcmp(testname, "data") == 0) {
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ack") == 0) {
+ send_ack(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "flags") == 0) {
+ send_flags(txfd, &daddr, 1, 0, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 1, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 1, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 0, 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "tcp") == 0) {
+ send_changed_checksum(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_seq(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_ts(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_diff_opt(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip") == 0) {
+ send_changed_ECN(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_tos(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ if (proto == PF_INET) {
+ /* Modified packets may be received out of order.
+ * Sleep function added to enforce test boundaries
+ * so that fin pkts are not received prior to other pkts.
+ */
+ sleep(1);
+ send_changed_ttl(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_ip_options(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_fragment4(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (proto == PF_INET6) {
+ send_fragment6(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ /* 20 is the difference between min iphdr size
+ * and min ipv6hdr size. Like MAX_HDR_SIZE,
+ * MAX_PAYLOAD is defined with the larger header of the two.
+ */
+ int offset = proto == PF_INET ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ send_large(txfd, &daddr, remainder);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_large(txfd, &daddr, remainder + 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else {
+ error(1, 0, "Unknown testcase");
+ }
+
+ if (close(txfd))
+ error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+ static int correct_payload[NUM_PACKETS];
+ int rxfd = -1;
+
+ rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+ if (rxfd < 0)
+ error(1, 0, "socket creation");
+ setup_sock_filter(rxfd);
+ set_timeout(rxfd);
+ bind_packetsocket(rxfd);
+
+ memset(correct_payload, 0, sizeof(correct_payload));
+
+ if (strcmp(testname, "data") == 0) {
+ printf("pure data packet of same size: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("large data packets followed by a smaller one: ");
+ correct_payload[0] = PAYLOAD_LEN * 1.5;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("small data packets followed by a larger one: ");
+ correct_payload[0] = PAYLOAD_LEN / 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ack") == 0) {
+ printf("duplicate ack and pure ack: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "flags") == 0) {
+ correct_payload[0] = PAYLOAD_LEN * 3;
+ correct_payload[1] = PAYLOAD_LEN * 2;
+
+ printf("psh flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = 0;
+ correct_payload[2] = PAYLOAD_LEN * 2;
+ printf("syn flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("rst flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("urg flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "tcp") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ correct_payload[3] = PAYLOAD_LEN;
+
+ printf("changed checksum does not coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Wrong Seq number doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Different timestamp doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 4);
+
+ printf("Different options doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+
+ printf("different ECN doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("different tos doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ if (proto == PF_INET) {
+ printf("different ttl doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("ip options doesn't coalesce: ");
+ correct_payload[2] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("fragmented ip4 doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (proto == PF_INET6) {
+ /* GRO doesn't check for ipv6 hop limit when flushing.
+ * Hence no corresponding test to the ipv4 case.
+ */
+ printf("fragmented ip6 doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ int offset = proto == PF_INET ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ correct_payload[0] = (MAX_PAYLOAD + offset);
+ correct_payload[1] = remainder;
+ printf("Shouldn't coalesce if exceed IP max pkt size: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* last segment sent individually, doesn't start new segment */
+ correct_payload[0] = correct_payload[0] - remainder;
+ correct_payload[1] = remainder + 1;
+ correct_payload[2] = remainder + 1;
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else {
+ error(1, 0, "Test case error, should never trigger");
+ }
+
+ if (close(rxfd))
+ error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+ static const struct option opts[] = {
+ { "dmac", required_argument, NULL, 'D' },
+ { "iface", required_argument, NULL, 'i' },
+ { "ipv4", no_argument, NULL, '4' },
+ { "ipv6", no_argument, NULL, '6' },
+ { "rx", no_argument, NULL, 'r' },
+ { "smac", required_argument, NULL, 'S' },
+ { "test", required_argument, NULL, 't' },
+ { "verbose", no_argument, NULL, 'v' },
+ { 0, 0, 0, 0 }
+ };
+ int c;
+
+ while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) {
+ switch (c) {
+ case '4':
+ proto = PF_INET;
+ ethhdr_proto = htons(ETH_P_IP);
+ break;
+ case '6':
+ proto = PF_INET6;
+ ethhdr_proto = htons(ETH_P_IPV6);
+ break;
+ case 'D':
+ dmac = optarg;
+ break;
+ case 'i':
+ ifname = optarg;
+ break;
+ case 'r':
+ tx_socket = false;
+ break;
+ case 'S':
+ smac = optarg;
+ break;
+ case 't':
+ testname = optarg;
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ default:
+ error(1, 0, "%s invalid option %c\n", __func__, c);
+ break;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_args(argc, argv);
+
+ if (proto == PF_INET) {
+ tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+ total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+ } else if (proto == PF_INET6) {
+ tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+ total_hdr_len = MAX_HDR_LEN;
+ } else {
+ error(1, 0, "Protocol family is not ipv4 or ipv6");
+ }
+
+ read_MAC(src_mac, smac);
+ read_MAC(dst_mac, dmac);
+
+ if (tx_socket)
+ gro_sender();
+ else
+ gro_receiver();
+ return 0;
+}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
new file mode 100755
index 000000000000..342ad27f631b
--- /dev/null
+++ b/tools/testing/selftests/net/gro.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
+readonly PROTOS=("ipv4" "ipv6")
+dev=""
+test="all"
+proto="ipv4"
+
+run_test() {
+ local server_pid=0
+ local exit_code=0
+ local protocol=$1
+ local test=$2
+ local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
+ "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
+
+ setup_ns
+ # Each test is run 3 times to deflake, because given the receive timing,
+ # not all packets that should coalesce will be considered in the same flow
+ # on every try.
+ for tries in {1..3}; do
+ # Actual test starts here
+ ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
+ 1>>log.txt &
+ server_pid=$!
+ sleep 0.5 # to allow for socket init
+ ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \
+ 1>>log.txt
+ wait "${server_pid}"
+ exit_code=$?
+ if [[ "${exit_code}" -eq 0 ]]; then
+ break;
+ fi
+ done
+ cleanup_ns
+ echo ${exit_code}
+}
+
+run_all_tests() {
+ local failed_tests=()
+ for proto in "${PROTOS[@]}"; do
+ for test in "${TESTS[@]}"; do
+ echo "running test ${proto} ${test}" >&2
+ exit_code=$(run_test $proto $test)
+ if [[ "${exit_code}" -ne 0 ]]; then
+ failed_tests+=("${proto}_${test}")
+ fi;
+ done;
+ done
+ if [[ ${#failed_tests[@]} -ne 0 ]]; then
+ echo "failed tests: ${failed_tests[*]}. \
+ Please see log.txt for more logs"
+ exit 1
+ else
+ echo "All Tests Succeeded!"
+ fi;
+}
+
+usage() {
+ echo "Usage: $0 \
+ [-i <DEV>] \
+ [-t data|ack|flags|tcp|ip|large] \
+ [-p <ipv4|ipv6>]" 1>&2;
+ exit 1;
+}
+
+while getopts "i:t:p:" opt; do
+ case "${opt}" in
+ i)
+ dev="${OPTARG}"
+ ;;
+ t)
+ test="${OPTARG}"
+ ;;
+ p)
+ proto="${OPTARG}"
+ ;;
+ *)
+ usage
+ ;;
+ esac
+done
+
+if [ -n "$dev" ]; then
+ source setup_loopback.sh
+else
+ source setup_veth.sh
+fi
+
+setup
+trap cleanup EXIT
+if [[ "${test}" == "all" ]]; then
+ run_all_tests
+else
+ run_test "${proto}" "${test}"
+fi;
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index bf361f30d6ef..ecbf57f264ed 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -63,10 +63,14 @@ log_test()
local rc=$1
local expected=$2
local msg="$3"
+ local xfail=$4
if [ ${rc} -eq ${expected} ]; then
printf "TEST: %-60s [ OK ]\n" "${msg}"
nsuccess=$((nsuccess+1))
+ elif [ ${rc} -eq ${xfail} ]; then
+ printf "TEST: %-60s [XFAIL]\n" "${msg}"
+ nxfail=$((nxfail+1))
else
ret=1
nfail=$((nfail+1))
@@ -309,9 +313,10 @@ check_exception()
fi
log_test $? 0 "IPv4: ${desc}"
- if [ "$with_redirect" = "yes" ]; then
+ # No PMTU info for test "redirect" and "mtu exception plus redirect"
+ if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then
ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
- grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}"
+ grep -v "mtu" | grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0"
elif [ -n "${mtu}" ]; then
ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
grep -q "${mtu}"
@@ -322,7 +327,7 @@ check_exception()
ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
grep -v "mtu" | grep -q "${R1_LLADDR}"
fi
- log_test $? 0 "IPv6: ${desc}"
+ log_test $? 0 "IPv6: ${desc}" 1
}
run_ping()
@@ -488,6 +493,7 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
ret=0
nsuccess=0
nfail=0
+nxfail=0
while getopts :pv o
do
@@ -532,5 +538,6 @@ fi
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
+printf "Tests xfailed: %3d\n" ${nxfail}
exit $ret
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
new file mode 100755
index 000000000000..a2489ec398fe
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -0,0 +1,668 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data
+# consistency directly inside packets on the receiver side. Tests are divided
+# into three categories: OUTPUT (evaluates the IOAM processing by the sender),
+# INPUT (evaluates the IOAM processing by the receiver) and GLOBAL (evaluates
+# wider use cases that do not fall into the other two categories). Both OUTPUT
+# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL
+# tests use the entire three-node topology (alpha, beta, gamma). Each test is
+# documented inside its own handler in the code below.
+#
+# An IOAM domain is configured from Alpha to Gamma but not on the reverse path.
+# When either Beta or Gamma is the destination (depending on the test category),
+# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | Alpha netns | | Gamma netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | db01::2/64 | | | | db02::2/64 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +----------------------------------------------------+
+# | . . |
+# | +-------------+ +-------------+ |
+# | | veth0 | | veth1 | |
+# | | db01::1/64 | ................ | db02::1/64 | |
+# | +-------------+ +-------------+ |
+# | |
+# | Beta netns |
+# | |
+# +----------------------------------------------------+
+#
+#
+#
+# =============================================================
+# | Alpha - IOAM configuration |
+# +===========================================================+
+# | Node ID | 1 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 11111111 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 0xffff (default value) |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 0xffffffff (default value) |
+# +-----------------------------------------------------------+
+# | Egress ID | 101 |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 101101 |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee0 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf00dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 777 |
+# +-----------------------------------------------------------+
+# | Schema Data | something that will be 4n-aligned |
+# +-----------------------------------------------------------+
+#
+#
+# =============================================================
+# | Beta - IOAM configuration |
+# +===========================================================+
+# | Node ID | 2 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 22222222 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 201 |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 201201 |
+# +-----------------------------------------------------------+
+# | Egress ID | 202 |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 202202 |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee1 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf11dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 666 |
+# +-----------------------------------------------------------+
+# | Schema Data | Hello there -Obi |
+# +-----------------------------------------------------------+
+#
+#
+# =============================================================
+# | Gamma - IOAM configuration |
+# +===========================================================+
+# | Node ID | 3 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 33333333 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 301 |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 301301 |
+# +-----------------------------------------------------------+
+# | Egress ID | 0xffff (default value) |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 0xffffffff (default value) |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee2 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf22dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 0xffffff (= None) |
+# +-----------------------------------------------------------+
+# | Schema Data | |
+# +-----------------------------------------------------------+
+
+
+################################################################################
+# #
+# WARNING: Be careful if you modify the block below - it MUST be kept #
+# synchronized with configurations inside ioam6_parser.c and always #
+# reflect the same. #
+# #
+################################################################################
+
+ALPHA=(
+ 1 # ID
+ 11111111 # Wide ID
+ 0xffff # Ingress ID
+ 0xffffffff # Ingress Wide ID
+ 101 # Egress ID
+ 101101 # Egress Wide ID
+ 0xdeadbee0 # Namespace Data
+ 0xcafec0caf00dc0de # Namespace Wide Data
+ 777 # Schema ID (0xffffff = None)
+ "something that will be 4n-aligned" # Schema Data
+)
+
+BETA=(
+ 2
+ 22222222
+ 201
+ 201201
+ 202
+ 202202
+ 0xdeadbee1
+ 0xcafec0caf11dc0de
+ 666
+ "Hello there -Obi"
+)
+
+GAMMA=(
+ 3
+ 33333333
+ 301
+ 301301
+ 0xffff
+ 0xffffffff
+ 0xdeadbee2
+ 0xcafec0caf22dc0de
+ 0xffffff
+ ""
+)
+
+TESTS_OUTPUT="
+ out_undef_ns
+ out_no_room
+ out_bits
+ out_full_supp_trace
+"
+
+TESTS_INPUT="
+ in_undef_ns
+ in_no_room
+ in_oflag
+ in_bits
+ in_full_supp_trace
+"
+
+TESTS_GLOBAL="
+ fwd_full_supp_trace
+"
+
+
+################################################################################
+# #
+# LIBRARY #
+# #
+################################################################################
+
+check_kernel_compatibility()
+{
+ ip netns add ioam-tmp-node
+ ip link add name veth0 netns ioam-tmp-node type veth \
+ peer name veth1 netns ioam-tmp-node
+
+ ip -netns ioam-tmp-node link set veth0 up
+ ip -netns ioam-tmp-node link set veth1 up
+
+ ip -netns ioam-tmp-node ioam namespace add 0 &>/dev/null
+ ns_ad=$?
+
+ ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0"
+ ns_sh=$?
+
+ if [[ $ns_ad != 0 || $ns_sh != 0 ]]
+ then
+ echo "SKIP: kernel version probably too old, missing ioam support"
+ ip link del veth0 2>/dev/null || true
+ ip netns del ioam-tmp-node || true
+ exit 1
+ fi
+
+ ip -netns ioam-tmp-node route add db02::/64 encap ioam6 trace prealloc \
+ type 0x800000 ns 0 size 4 dev veth0 &>/dev/null
+ tr_ad=$?
+
+ ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6 trace"
+ tr_sh=$?
+
+ if [[ $tr_ad != 0 || $tr_sh != 0 ]]
+ then
+ echo "SKIP: cannot attach an ioam trace to a route, did you compile" \
+ "without CONFIG_IPV6_IOAM6_LWTUNNEL?"
+ ip link del veth0 2>/dev/null || true
+ ip netns del ioam-tmp-node || true
+ exit 1
+ fi
+
+ ip link del veth0 2>/dev/null || true
+ ip netns del ioam-tmp-node || true
+}
+
+cleanup()
+{
+ ip link del ioam-veth-alpha 2>/dev/null || true
+ ip link del ioam-veth-gamma 2>/dev/null || true
+
+ ip netns del ioam-node-alpha || true
+ ip netns del ioam-node-beta || true
+ ip netns del ioam-node-gamma || true
+}
+
+setup()
+{
+ ip netns add ioam-node-alpha
+ ip netns add ioam-node-beta
+ ip netns add ioam-node-gamma
+
+ ip link add name ioam-veth-alpha netns ioam-node-alpha type veth \
+ peer name ioam-veth-betaL netns ioam-node-beta
+ ip link add name ioam-veth-betaR netns ioam-node-beta type veth \
+ peer name ioam-veth-gamma netns ioam-node-gamma
+
+ ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0
+ ip -netns ioam-node-beta link set ioam-veth-betaL name veth0
+ ip -netns ioam-node-beta link set ioam-veth-betaR name veth1
+ ip -netns ioam-node-gamma link set ioam-veth-gamma name veth0
+
+ ip -netns ioam-node-alpha addr add db01::2/64 dev veth0
+ ip -netns ioam-node-alpha link set veth0 up
+ ip -netns ioam-node-alpha link set lo up
+ ip -netns ioam-node-alpha route add db02::/64 via db01::1 dev veth0
+ ip -netns ioam-node-alpha route del db01::/64
+ ip -netns ioam-node-alpha route add db01::/64 dev veth0
+
+ ip -netns ioam-node-beta addr add db01::1/64 dev veth0
+ ip -netns ioam-node-beta addr add db02::1/64 dev veth1
+ ip -netns ioam-node-beta link set veth0 up
+ ip -netns ioam-node-beta link set veth1 up
+ ip -netns ioam-node-beta link set lo up
+
+ ip -netns ioam-node-gamma addr add db02::2/64 dev veth0
+ ip -netns ioam-node-gamma link set veth0 up
+ ip -netns ioam-node-gamma link set lo up
+ ip -netns ioam-node-gamma route add db01::/64 via db02::1 dev veth0
+
+ # - IOAM config -
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]}
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]}
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]}
+ ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]}
+ ip -netns ioam-node-alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]}
+ ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}"
+ ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
+
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]}
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]}
+ ip -netns ioam-node-beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]}
+ ip -netns ioam-node-beta ioam schema add ${BETA[8]} "${BETA[9]}"
+ ip -netns ioam-node-beta ioam namespace set 123 schema ${BETA[8]}
+
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]}
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]}
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]}
+ ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]}
+ ip -netns ioam-node-gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]}
+
+ sleep 1
+
+ ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null
+ if [ $? != 0 ]
+ then
+ echo "Setup FAILED"
+ cleanup &>/dev/null
+ exit 0
+ fi
+}
+
+log_test_passed()
+{
+ local desc=$1
+ printf "TEST: %-60s [ OK ]\n" "${desc}"
+}
+
+log_test_failed()
+{
+ local desc=$1
+ printf "TEST: %-60s [FAIL]\n" "${desc}"
+}
+
+run_test()
+{
+ local name=$1
+ local desc=$2
+ local node_src=$3
+ local node_dst=$4
+ local ip6_src=$5
+ local ip6_dst=$6
+ local if_dst=$7
+ local trace_type=$8
+ local ioam_ns=$9
+
+ ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \
+ $trace_type $ioam_ns &
+ local spid=$!
+ sleep 0.1
+
+ ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null
+ if [ $? != 0 ]
+ then
+ log_test_failed "${desc}"
+ kill -2 $spid &>/dev/null
+ else
+ wait $spid
+ [ $? = 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+ fi
+}
+
+run()
+{
+ echo
+ echo "OUTPUT tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ # set OUTPUT settings
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0
+
+ for t in $TESTS_OUTPUT
+ do
+ $t
+ done
+
+ # clean OUTPUT settings
+ ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip -netns ioam-node-alpha route change db01::/64 dev veth0
+
+
+ echo
+ echo "INPUT tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ # set INPUT settings
+ ip -netns ioam-node-alpha ioam namespace del 123
+
+ for t in $TESTS_INPUT
+ do
+ $t
+ done
+
+ # clean INPUT settings
+ ip -netns ioam-node-alpha ioam namespace add 123 \
+ data ${ALPHA[6]} wide ${ALPHA[7]}
+ ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
+ ip -netns ioam-node-alpha route change db01::/64 dev veth0
+
+
+ echo
+ echo "GLOBAL tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ for t in $TESTS_GLOBAL
+ do
+ $t
+ done
+}
+
+bit2type=(
+ 0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000
+ 0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100
+ 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002
+)
+bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 )
+
+
+################################################################################
+# #
+# OUTPUT tests #
+# #
+# Two nodes (sender/receiver), IOAM disabled on ingress for the receiver. #
+################################################################################
+
+out_undef_ns()
+{
+ ##############################################################################
+ # Make sure that the encap node won't fill the trace if the chosen IOAM #
+ # namespace is not configured locally. #
+ ##############################################################################
+ local desc="Unknown IOAM namespace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0x800000 ns 0 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0x800000 0
+}
+
+out_no_room()
+{
+ ##############################################################################
+ # Make sure that the encap node won't fill the trace and will set the #
+ # Overflow flag since there is no room enough for its data. #
+ ##############################################################################
+ local desc="Missing trace room"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xc00000 123
+}
+
+out_bits()
+{
+ ##############################################################################
+ # Make sure that, for each trace type bit, the encap node will either: #
+ # (i) fill the trace with its data when it is a supported bit #
+ # (ii) not fill the trace with its data when it is an unsupported bit #
+ ##############################################################################
+ local desc="Trace type with bit <n> only"
+
+ local tmp=${bit2size[22]}
+ bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
+
+ for i in {0..22}
+ do
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
+ prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
+ dev veth0 &>/dev/null
+
+ local cmd_res=$?
+ local descr="${desc/<n>/$i}"
+
+ if [[ $i -ge 12 && $i -le 21 ]]
+ then
+ if [ $cmd_res != 0 ]
+ then
+ npassed=$((npassed+1))
+ log_test_passed "$descr"
+ else
+ nfailed=$((nfailed+1))
+ log_test_failed "$descr"
+ fi
+ else
+ run_test "out_bit$i" "$descr" ioam-node-alpha ioam-node-beta \
+ db01::2 db01::1 veth0 ${bit2type[$i]} 123
+ fi
+ done
+
+ bit2size[22]=$tmp
+}
+
+out_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that the encap node will correctly fill a full trace. Be careful,#
+ # "full trace" here does NOT mean all bits (only supported ones). #
+ ##############################################################################
+ local desc="Full supported trace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xfff002 ns 123 size 100 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xfff002 123
+}
+
+
+################################################################################
+# #
+# INPUT tests #
+# #
+# Two nodes (sender/receiver), the sender MUST NOT fill the trace upon #
+# insertion -> the IOAM namespace configured on the sender is removed #
+# and is used in the inserted trace to force the sender not to fill it. #
+################################################################################
+
+in_undef_ns()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace if the related IOAM #
+ # namespace is not configured locally. #
+ ##############################################################################
+ local desc="Unknown IOAM namespace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0x800000 ns 0 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0x800000 0
+}
+
+in_no_room()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace and will set the #
+ # Overflow flag if there is no room enough for its data. #
+ ##############################################################################
+ local desc="Missing trace room"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xc00000 123
+}
+
+in_bits()
+{
+ ##############################################################################
+ # Make sure that, for each trace type bit, the receiving node will either: #
+ # (i) fill the trace with its data when it is a supported bit #
+ # (ii) not fill the trace with its data when it is an unsupported bit #
+ ##############################################################################
+ local desc="Trace type with bit <n> only"
+
+ local tmp=${bit2size[22]}
+ bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
+
+ for i in {0..11} {22..22}
+ do
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
+ prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
+
+ run_test "in_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
+ db01::2 db01::1 veth0 ${bit2type[$i]} 123
+ done
+
+ bit2size[22]=$tmp
+}
+
+in_oflag()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace since the Overflow #
+ # flag is set. #
+ ##############################################################################
+ local desc="Overflow flag is set"
+
+ # Exception:
+ # Here, we need the sender to set the Overflow flag. For that, we will add
+ # back the IOAM namespace that was previously configured on the sender.
+ ip -netns ioam-node-alpha ioam namespace add 123
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xc00000 123
+
+ # And we clean the exception for this test to get things back to normal for
+ # other INPUT tests
+ ip -netns ioam-node-alpha ioam namespace del 123
+}
+
+in_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that the receiving node will correctly fill a full trace. Be #
+ # careful, "full trace" here does NOT mean all bits (only supported ones). #
+ ##############################################################################
+ local desc="Full supported trace"
+
+ ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
+ type 0xfff002 ns 123 size 80 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
+ db01::1 veth0 0xfff002 123
+}
+
+
+################################################################################
+# #
+# GLOBAL tests #
+# #
+# Three nodes (sender/router/receiver), IOAM fully enabled on every node. #
+################################################################################
+
+fwd_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that all three nodes correctly filled the full supported trace #
+ # by checking that the trace data is consistent with the predefined config. #
+ ##############################################################################
+ local desc="Forward - Full supported trace"
+
+ ip -netns ioam-node-alpha route change db02::/64 encap ioam6 trace prealloc \
+ type 0xfff002 ns 123 size 244 via db01::1 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-gamma db01::2 \
+ db02::2 veth0 0xfff002 123
+}
+
+
+################################################################################
+# #
+# MAIN #
+# #
+################################################################################
+
+if [ "$(id -u)" -ne 0 ]
+then
+ echo "SKIP: Need root privileges"
+ exit 1
+fi
+
+if [ ! -x "$(command -v ip)" ]
+then
+ echo "SKIP: Could not run test without ip tool"
+ exit 1
+fi
+
+ip ioam &>/dev/null
+if [ $? = 1 ]
+then
+ echo "SKIP: iproute2 too old, missing ioam command"
+ exit 1
+fi
+
+check_kernel_compatibility
+
+cleanup &>/dev/null
+setup
+run
+cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
new file mode 100644
index 000000000000..8f6997d35816
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -0,0 +1,676 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Author: Justin Iurman (justin.iurman@uliege.be)
+ *
+ * IOAM tester for IPv6, see ioam6.sh for details on each test case.
+ */
+#include <arpa/inet.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/const.h>
+#include <linux/if_ether.h>
+#include <linux/ioam6.h>
+#include <linux/ipv6.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+struct ioam_config {
+ __u32 id;
+ __u64 wide;
+ __u16 ingr_id;
+ __u16 egr_id;
+ __u32 ingr_wide;
+ __u32 egr_wide;
+ __u32 ns_data;
+ __u64 ns_wide;
+ __u32 sc_id;
+ __u8 hlim;
+ char *sc_data;
+};
+
+/*
+ * Be careful if you modify structs below - everything MUST be kept synchronized
+ * with configurations inside ioam6.sh and always reflect the same.
+ */
+
+static struct ioam_config node1 = {
+ .id = 1,
+ .wide = 11111111,
+ .ingr_id = 0xffff, /* default value */
+ .egr_id = 101,
+ .ingr_wide = 0xffffffff, /* default value */
+ .egr_wide = 101101,
+ .ns_data = 0xdeadbee0,
+ .ns_wide = 0xcafec0caf00dc0de,
+ .sc_id = 777,
+ .sc_data = "something that will be 4n-aligned",
+ .hlim = 64,
+};
+
+static struct ioam_config node2 = {
+ .id = 2,
+ .wide = 22222222,
+ .ingr_id = 201,
+ .egr_id = 202,
+ .ingr_wide = 201201,
+ .egr_wide = 202202,
+ .ns_data = 0xdeadbee1,
+ .ns_wide = 0xcafec0caf11dc0de,
+ .sc_id = 666,
+ .sc_data = "Hello there -Obi",
+ .hlim = 63,
+};
+
+static struct ioam_config node3 = {
+ .id = 3,
+ .wide = 33333333,
+ .ingr_id = 301,
+ .egr_id = 0xffff, /* default value */
+ .ingr_wide = 301301,
+ .egr_wide = 0xffffffff, /* default value */
+ .ns_data = 0xdeadbee2,
+ .ns_wide = 0xcafec0caf22dc0de,
+ .sc_id = 0xffffff, /* default value */
+ .sc_data = NULL,
+ .hlim = 62,
+};
+
+enum {
+ /**********
+ * OUTPUT *
+ **********/
+ TEST_OUT_UNDEF_NS,
+ TEST_OUT_NO_ROOM,
+ TEST_OUT_BIT0,
+ TEST_OUT_BIT1,
+ TEST_OUT_BIT2,
+ TEST_OUT_BIT3,
+ TEST_OUT_BIT4,
+ TEST_OUT_BIT5,
+ TEST_OUT_BIT6,
+ TEST_OUT_BIT7,
+ TEST_OUT_BIT8,
+ TEST_OUT_BIT9,
+ TEST_OUT_BIT10,
+ TEST_OUT_BIT11,
+ TEST_OUT_BIT22,
+ TEST_OUT_FULL_SUPP_TRACE,
+
+ /*********
+ * INPUT *
+ *********/
+ TEST_IN_UNDEF_NS,
+ TEST_IN_NO_ROOM,
+ TEST_IN_OFLAG,
+ TEST_IN_BIT0,
+ TEST_IN_BIT1,
+ TEST_IN_BIT2,
+ TEST_IN_BIT3,
+ TEST_IN_BIT4,
+ TEST_IN_BIT5,
+ TEST_IN_BIT6,
+ TEST_IN_BIT7,
+ TEST_IN_BIT8,
+ TEST_IN_BIT9,
+ TEST_IN_BIT10,
+ TEST_IN_BIT11,
+ TEST_IN_BIT22,
+ TEST_IN_FULL_SUPP_TRACE,
+
+ /**********
+ * GLOBAL *
+ **********/
+ TEST_FWD_FULL_SUPP_TRACE,
+
+ __TEST_MAX,
+};
+
+static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
+ __u32 trace_type, __u16 ioam_ns)
+{
+ if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns ||
+ __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8))
+ return 1;
+
+ switch (tid) {
+ case TEST_OUT_UNDEF_NS:
+ case TEST_IN_UNDEF_NS:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 1 ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_NO_ROOM:
+ case TEST_IN_NO_ROOM:
+ case TEST_IN_OFLAG:
+ return !ioam6h->overflow ||
+ ioam6h->nodelen != 2 ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_BIT0:
+ case TEST_IN_BIT0:
+ case TEST_OUT_BIT1:
+ case TEST_IN_BIT1:
+ case TEST_OUT_BIT2:
+ case TEST_IN_BIT2:
+ case TEST_OUT_BIT3:
+ case TEST_IN_BIT3:
+ case TEST_OUT_BIT4:
+ case TEST_IN_BIT4:
+ case TEST_OUT_BIT5:
+ case TEST_IN_BIT5:
+ case TEST_OUT_BIT6:
+ case TEST_IN_BIT6:
+ case TEST_OUT_BIT7:
+ case TEST_IN_BIT7:
+ case TEST_OUT_BIT11:
+ case TEST_IN_BIT11:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 1 ||
+ ioam6h->remlen;
+
+ case TEST_OUT_BIT8:
+ case TEST_IN_BIT8:
+ case TEST_OUT_BIT9:
+ case TEST_IN_BIT9:
+ case TEST_OUT_BIT10:
+ case TEST_IN_BIT10:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 2 ||
+ ioam6h->remlen;
+
+ case TEST_OUT_BIT22:
+ case TEST_IN_BIT22:
+ return ioam6h->overflow ||
+ ioam6h->nodelen ||
+ ioam6h->remlen;
+
+ case TEST_OUT_FULL_SUPP_TRACE:
+ case TEST_IN_FULL_SUPP_TRACE:
+ case TEST_FWD_FULL_SUPP_TRACE:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 15 ||
+ ioam6h->remlen;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
+ const struct ioam_config cnf)
+{
+ unsigned int len;
+ __u8 aligned;
+ __u64 raw64;
+ __u32 raw32;
+
+ if (ioam6h->type.bit0) {
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff))
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit1) {
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (cnf.ingr_id != (raw32 >> 16) ||
+ cnf.egr_id != (raw32 & 0xffff))
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit2)
+ *p += sizeof(__u32);
+
+ if (ioam6h->type.bit3)
+ *p += sizeof(__u32);
+
+ if (ioam6h->type.bit4) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit5) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit6) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit7) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit8) {
+ raw64 = __be64_to_cpu(*((__u64 *)*p));
+ if (cnf.hlim != (raw64 >> 56) ||
+ cnf.wide != (raw64 & 0xffffffffffffff))
+ return 1;
+ *p += sizeof(__u64);
+ }
+
+ if (ioam6h->type.bit9) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide)
+ return 1;
+ *p += sizeof(__u32);
+
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit10) {
+ if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide)
+ return 1;
+ *p += sizeof(__u64);
+ }
+
+ if (ioam6h->type.bit11) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit12) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit13) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit14) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit15) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit16) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit17) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit18) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit19) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit20) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit21) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit22) {
+ len = cnf.sc_data ? strlen(cnf.sc_data) : 0;
+ aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0;
+
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (aligned != (raw32 >> 24) * 4 ||
+ cnf.sc_id != (raw32 & 0xffffff))
+ return 1;
+ *p += sizeof(__u32);
+
+ if (cnf.sc_data) {
+ if (strncmp((char *)*p, cnf.sc_data, len))
+ return 1;
+
+ *p += len;
+ aligned -= len;
+
+ while (aligned--) {
+ if (**p != '\0')
+ return 1;
+ *p += sizeof(__u8);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h,
+ __u32 trace_type, __u16 ioam_ns)
+{
+ __u8 *p;
+
+ if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns))
+ return 1;
+
+ p = ioam6h->data + ioam6h->remlen * 4;
+
+ switch (tid) {
+ case TEST_OUT_BIT0:
+ case TEST_OUT_BIT1:
+ case TEST_OUT_BIT2:
+ case TEST_OUT_BIT3:
+ case TEST_OUT_BIT4:
+ case TEST_OUT_BIT5:
+ case TEST_OUT_BIT6:
+ case TEST_OUT_BIT7:
+ case TEST_OUT_BIT8:
+ case TEST_OUT_BIT9:
+ case TEST_OUT_BIT10:
+ case TEST_OUT_BIT11:
+ case TEST_OUT_BIT22:
+ case TEST_OUT_FULL_SUPP_TRACE:
+ return check_ioam6_data(&p, ioam6h, node1);
+
+ case TEST_IN_BIT0:
+ case TEST_IN_BIT1:
+ case TEST_IN_BIT2:
+ case TEST_IN_BIT3:
+ case TEST_IN_BIT4:
+ case TEST_IN_BIT5:
+ case TEST_IN_BIT6:
+ case TEST_IN_BIT7:
+ case TEST_IN_BIT8:
+ case TEST_IN_BIT9:
+ case TEST_IN_BIT10:
+ case TEST_IN_BIT11:
+ case TEST_IN_BIT22:
+ case TEST_IN_FULL_SUPP_TRACE:
+ {
+ __u32 tmp32 = node2.egr_wide;
+ __u16 tmp16 = node2.egr_id;
+ int res;
+
+ node2.egr_id = 0xffff;
+ node2.egr_wide = 0xffffffff;
+
+ res = check_ioam6_data(&p, ioam6h, node2);
+
+ node2.egr_id = tmp16;
+ node2.egr_wide = tmp32;
+
+ return res;
+ }
+
+ case TEST_FWD_FULL_SUPP_TRACE:
+ if (check_ioam6_data(&p, ioam6h, node3))
+ return 1;
+ if (check_ioam6_data(&p, ioam6h, node2))
+ return 1;
+ return check_ioam6_data(&p, ioam6h, node1);
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int str2id(const char *tname)
+{
+ if (!strcmp("out_undef_ns", tname))
+ return TEST_OUT_UNDEF_NS;
+ if (!strcmp("out_no_room", tname))
+ return TEST_OUT_NO_ROOM;
+ if (!strcmp("out_bit0", tname))
+ return TEST_OUT_BIT0;
+ if (!strcmp("out_bit1", tname))
+ return TEST_OUT_BIT1;
+ if (!strcmp("out_bit2", tname))
+ return TEST_OUT_BIT2;
+ if (!strcmp("out_bit3", tname))
+ return TEST_OUT_BIT3;
+ if (!strcmp("out_bit4", tname))
+ return TEST_OUT_BIT4;
+ if (!strcmp("out_bit5", tname))
+ return TEST_OUT_BIT5;
+ if (!strcmp("out_bit6", tname))
+ return TEST_OUT_BIT6;
+ if (!strcmp("out_bit7", tname))
+ return TEST_OUT_BIT7;
+ if (!strcmp("out_bit8", tname))
+ return TEST_OUT_BIT8;
+ if (!strcmp("out_bit9", tname))
+ return TEST_OUT_BIT9;
+ if (!strcmp("out_bit10", tname))
+ return TEST_OUT_BIT10;
+ if (!strcmp("out_bit11", tname))
+ return TEST_OUT_BIT11;
+ if (!strcmp("out_bit22", tname))
+ return TEST_OUT_BIT22;
+ if (!strcmp("out_full_supp_trace", tname))
+ return TEST_OUT_FULL_SUPP_TRACE;
+ if (!strcmp("in_undef_ns", tname))
+ return TEST_IN_UNDEF_NS;
+ if (!strcmp("in_no_room", tname))
+ return TEST_IN_NO_ROOM;
+ if (!strcmp("in_oflag", tname))
+ return TEST_IN_OFLAG;
+ if (!strcmp("in_bit0", tname))
+ return TEST_IN_BIT0;
+ if (!strcmp("in_bit1", tname))
+ return TEST_IN_BIT1;
+ if (!strcmp("in_bit2", tname))
+ return TEST_IN_BIT2;
+ if (!strcmp("in_bit3", tname))
+ return TEST_IN_BIT3;
+ if (!strcmp("in_bit4", tname))
+ return TEST_IN_BIT4;
+ if (!strcmp("in_bit5", tname))
+ return TEST_IN_BIT5;
+ if (!strcmp("in_bit6", tname))
+ return TEST_IN_BIT6;
+ if (!strcmp("in_bit7", tname))
+ return TEST_IN_BIT7;
+ if (!strcmp("in_bit8", tname))
+ return TEST_IN_BIT8;
+ if (!strcmp("in_bit9", tname))
+ return TEST_IN_BIT9;
+ if (!strcmp("in_bit10", tname))
+ return TEST_IN_BIT10;
+ if (!strcmp("in_bit11", tname))
+ return TEST_IN_BIT11;
+ if (!strcmp("in_bit22", tname))
+ return TEST_IN_BIT22;
+ if (!strcmp("in_full_supp_trace", tname))
+ return TEST_IN_FULL_SUPP_TRACE;
+ if (!strcmp("fwd_full_supp_trace", tname))
+ return TEST_FWD_FULL_SUPP_TRACE;
+
+ return -1;
+}
+
+static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2)
+{
+ return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) |
+ (a1->s6_addr32[1] ^ a2->s6_addr32[1]) |
+ (a1->s6_addr32[2] ^ a2->s6_addr32[2]) |
+ (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0;
+}
+
+static int get_u32(__u32 *val, const char *arg, int base)
+{
+ unsigned long res;
+ char *ptr;
+
+ if (!arg || !*arg)
+ return -1;
+ res = strtoul(arg, &ptr, base);
+
+ if (!ptr || ptr == arg || *ptr)
+ return -1;
+
+ if (res == ULONG_MAX && errno == ERANGE)
+ return -1;
+
+ if (res > 0xFFFFFFFFUL)
+ return -1;
+
+ *val = res;
+ return 0;
+}
+
+static int get_u16(__u16 *val, const char *arg, int base)
+{
+ unsigned long res;
+ char *ptr;
+
+ if (!arg || !*arg)
+ return -1;
+ res = strtoul(arg, &ptr, base);
+
+ if (!ptr || ptr == arg || *ptr)
+ return -1;
+
+ if (res == ULONG_MAX && errno == ERANGE)
+ return -1;
+
+ if (res > 0xFFFFUL)
+ return -1;
+
+ *val = res;
+ return 0;
+}
+
+static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
+ [TEST_OUT_UNDEF_NS] = check_ioam_header,
+ [TEST_OUT_NO_ROOM] = check_ioam_header,
+ [TEST_OUT_BIT0] = check_ioam_header_and_data,
+ [TEST_OUT_BIT1] = check_ioam_header_and_data,
+ [TEST_OUT_BIT2] = check_ioam_header_and_data,
+ [TEST_OUT_BIT3] = check_ioam_header_and_data,
+ [TEST_OUT_BIT4] = check_ioam_header_and_data,
+ [TEST_OUT_BIT5] = check_ioam_header_and_data,
+ [TEST_OUT_BIT6] = check_ioam_header_and_data,
+ [TEST_OUT_BIT7] = check_ioam_header_and_data,
+ [TEST_OUT_BIT8] = check_ioam_header_and_data,
+ [TEST_OUT_BIT9] = check_ioam_header_and_data,
+ [TEST_OUT_BIT10] = check_ioam_header_and_data,
+ [TEST_OUT_BIT11] = check_ioam_header_and_data,
+ [TEST_OUT_BIT22] = check_ioam_header_and_data,
+ [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+ [TEST_IN_UNDEF_NS] = check_ioam_header,
+ [TEST_IN_NO_ROOM] = check_ioam_header,
+ [TEST_IN_OFLAG] = check_ioam_header,
+ [TEST_IN_BIT0] = check_ioam_header_and_data,
+ [TEST_IN_BIT1] = check_ioam_header_and_data,
+ [TEST_IN_BIT2] = check_ioam_header_and_data,
+ [TEST_IN_BIT3] = check_ioam_header_and_data,
+ [TEST_IN_BIT4] = check_ioam_header_and_data,
+ [TEST_IN_BIT5] = check_ioam_header_and_data,
+ [TEST_IN_BIT6] = check_ioam_header_and_data,
+ [TEST_IN_BIT7] = check_ioam_header_and_data,
+ [TEST_IN_BIT8] = check_ioam_header_and_data,
+ [TEST_IN_BIT9] = check_ioam_header_and_data,
+ [TEST_IN_BIT10] = check_ioam_header_and_data,
+ [TEST_IN_BIT11] = check_ioam_header_and_data,
+ [TEST_IN_BIT22] = check_ioam_header_and_data,
+ [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+ [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+};
+
+int main(int argc, char **argv)
+{
+ int fd, size, hoplen, tid, ret = 1;
+ struct in6_addr src, dst;
+ struct ioam6_hdr *opt;
+ struct ipv6hdr *ip6h;
+ __u8 buffer[400], *p;
+ __u16 ioam_ns;
+ __u32 tr_type;
+
+ if (argc != 7)
+ goto out;
+
+ tid = str2id(argv[2]);
+ if (tid < 0 || !func[tid])
+ goto out;
+
+ if (inet_pton(AF_INET6, argv[3], &src) != 1 ||
+ inet_pton(AF_INET6, argv[4], &dst) != 1)
+ goto out;
+
+ if (get_u32(&tr_type, argv[5], 16) ||
+ get_u16(&ioam_ns, argv[6], 0))
+ goto out;
+
+ fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6));
+ if (!fd)
+ goto out;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ argv[1], strlen(argv[1])))
+ goto close;
+
+recv:
+ size = recv(fd, buffer, sizeof(buffer), 0);
+ if (size <= 0)
+ goto close;
+
+ ip6h = (struct ipv6hdr *)buffer;
+
+ if (!ipv6_addr_equal(&ip6h->saddr, &src) ||
+ !ipv6_addr_equal(&ip6h->daddr, &dst))
+ goto recv;
+
+ if (ip6h->nexthdr != IPPROTO_HOPOPTS)
+ goto close;
+
+ p = buffer + sizeof(*ip6h);
+ hoplen = (p[1] + 1) << 3;
+ p += sizeof(struct ipv6_hopopt_hdr);
+
+ while (hoplen > 0) {
+ opt = (struct ioam6_hdr *)p;
+
+ if (opt->opt_type == IPV6_TLV_IOAM &&
+ opt->type == IOAM6_TYPE_PREALLOC) {
+ p += sizeof(*opt);
+ ret = func[tid](tid, (struct ioam6_trace_hdr *)p,
+ tr_type, ioam_ns);
+ break;
+ }
+
+ p += opt->opt_len + 2;
+ hoplen -= opt->opt_len + 2;
+ }
+close:
+ close(fd);
+out:
+ return ret;
+}
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index f23438d512c5..3d7dde2c321b 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -484,13 +484,16 @@ enum desc_type {
MONITOR_ACQUIRE,
EXPIRE_STATE,
EXPIRE_POLICY,
+ SPDINFO_ATTRS,
};
const char *desc_name[] = {
"create tunnel",
"alloc spi",
"monitor acquire",
"expire state",
- "expire policy"
+ "expire policy",
+ "spdinfo attributes",
+ ""
};
struct xfrm_desc {
enum desc_type type;
@@ -1593,6 +1596,155 @@ out_close:
return ret;
}
+static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq,
+ unsigned thresh4_l, unsigned thresh4_r,
+ unsigned thresh6_l, unsigned thresh6_r,
+ bool add_bad_attr)
+
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ uint32_t unused;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct xfrmu_spdhthresh thresh;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused));
+ req.nh.nlmsg_type = XFRM_MSG_NEWSPDINFO;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ thresh.lbits = thresh4_l;
+ thresh.rbits = thresh4_r;
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh)))
+ return -1;
+
+ thresh.lbits = thresh6_l;
+ thresh.rbits = thresh6_r;
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh)))
+ return -1;
+
+ if (add_bad_attr) {
+ BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1);
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) {
+ pr_err("adding attribute failed: no space");
+ return -1;
+ }
+ }
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ return -1;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ return -1;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ uint32_t unused;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) {
+ pr_err("Can't set SPD HTHRESH");
+ return KSFT_FAIL;
+ }
+
+ memset(&req, 0, sizeof(req));
+
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused));
+ req.nh.nlmsg_type = XFRM_MSG_GETSPDINFO;
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_seq = (*seq)++;
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return KSFT_FAIL;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ return KSFT_FAIL;
+ } else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) {
+ size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused));
+ struct rtattr *attr = (void *)req.attrbuf;
+ int got_thresh = 0;
+
+ for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) {
+ if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) {
+ struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+ got_thresh++;
+ if (t->lbits != 32 || t->rbits != 31) {
+ pr_err("thresh differ: %u, %u",
+ t->lbits, t->rbits);
+ return KSFT_FAIL;
+ }
+ }
+ if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) {
+ struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+ got_thresh++;
+ if (t->lbits != 120 || t->rbits != 16) {
+ pr_err("thresh differ: %u, %u",
+ t->lbits, t->rbits);
+ return KSFT_FAIL;
+ }
+ }
+ }
+ if (got_thresh != 2) {
+ pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh);
+ return KSFT_FAIL;
+ }
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ return KSFT_FAIL;
+ } else {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ return -1;
+ }
+
+ /* Restore the default */
+ if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) {
+ pr_err("Can't restore SPD HTHRESH");
+ return KSFT_FAIL;
+ }
+
+ /*
+ * At this moment xfrm uses nlmsg_parse_deprecated(), which
+ * implies NL_VALIDATE_LIBERAL - ignoring attributes with
+ * (type > maxtype). nla_parse_depricated_strict() would enforce
+ * it. Or even stricter nla_parse().
+ * Right now it's not expected to fail, but to be ignored.
+ */
+ if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true))
+ return KSFT_PASS;
+
+ return KSFT_PASS;
+}
+
static int child_serv(int xfrm_sock, uint32_t *seq,
unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc)
{
@@ -1717,6 +1869,9 @@ static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf)
case EXPIRE_POLICY:
ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc);
break;
+ case SPDINFO_ATTRS:
+ ret = xfrm_spdinfo_attrs(xfrm_sock, &seq);
+ break;
default:
printk("Unknown desc type %d", desc.type);
exit(KSFT_FAIL);
@@ -1994,8 +2149,10 @@ static int write_proto_plan(int fd, int proto)
* sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176
*
* Check the affected by the UABI difference structures.
+ * Also, check translation for xfrm_set_spdinfo: it has it's own attributes
+ * which needs to be correctly copied, but not translated.
*/
-const unsigned int compat_plan = 4;
+const unsigned int compat_plan = 5;
static int write_compat_struct_tests(int test_desc_fd)
{
struct xfrm_desc desc = {};
@@ -2019,6 +2176,10 @@ static int write_compat_struct_tests(int test_desc_fd)
if (__write_desc(test_desc_fd, &desc))
return -1;
+ desc.type = SPDINFO_ATTRS;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
return 0;
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index d88e1fdfb147..89c4753c2760 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -6,6 +6,7 @@
#include <limits.h>
#include <fcntl.h>
#include <string.h>
+#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -25,6 +26,7 @@
#include <netinet/in.h>
#include <linux/tcp.h>
+#include <linux/time_types.h>
extern int optind;
@@ -66,6 +68,13 @@ static unsigned int cfg_do_w;
static int cfg_wait;
static uint32_t cfg_mark;
+struct cfg_cmsg_types {
+ unsigned int cmsg_enabled:1;
+ unsigned int timestampns:1;
+};
+
+static struct cfg_cmsg_types cfg_cmsg_types;
+
static void die_usage(void)
{
fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
@@ -80,11 +89,22 @@ static void die_usage(void)
fprintf(stderr, "\t-M mark -- set socket packet mark\n");
fprintf(stderr, "\t-u -- check mptcp ulp\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
+ fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
fprintf(stderr,
"\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
exit(1);
}
+static void xerror(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ exit(1);
+}
+
static void handle_signal(int nr)
{
quit = true;
@@ -338,6 +358,58 @@ static size_t do_write(const int fd, char *buf, const size_t len)
return offset;
}
+static void process_cmsg(struct msghdr *msgh)
+{
+ struct __kernel_timespec ts;
+ bool ts_found = false;
+ struct cmsghdr *cmsg;
+
+ for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) {
+ memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts));
+ ts_found = true;
+ continue;
+ }
+ }
+
+ if (cfg_cmsg_types.timestampns) {
+ if (!ts_found)
+ xerror("TIMESTAMPNS not present\n");
+ }
+}
+
+static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len)
+{
+ char msg_buf[8192];
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = len,
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = msg_buf,
+ .msg_controllen = sizeof(msg_buf),
+ };
+ int flags = 0;
+ int ret = recvmsg(fd, &msg, flags);
+
+ if (ret <= 0)
+ return ret;
+
+ if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled)
+ xerror("got %lu bytes of cmsg data, expected 0\n",
+ (unsigned long)msg.msg_controllen);
+
+ if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled)
+ xerror("%s\n", "got no cmsg data");
+
+ if (msg.msg_controllen)
+ process_cmsg(&msg);
+
+ return ret;
+}
+
static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
{
int ret = 0;
@@ -357,6 +429,8 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
} else if (cfg_peek == CFG_AFTER_PEEK) {
ret = recv(fd, buf, cap, MSG_PEEK);
ret = (ret < 0) ? ret : read(fd, buf, cap);
+ } else if (cfg_cmsg_types.cmsg_enabled) {
+ ret = do_recvmsg_cmsg(fd, buf, cap);
} else {
ret = read(fd, buf, cap);
}
@@ -786,6 +860,48 @@ static void init_rng(void)
srand(foo);
}
+static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen)
+{
+ int err;
+
+ err = setsockopt(fd, level, optname, optval, optlen);
+ if (err) {
+ perror("setsockopt");
+ exit(1);
+ }
+}
+
+static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg)
+{
+ static const unsigned int on = 1;
+
+ if (cmsg->timestampns)
+ xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on));
+}
+
+static void parse_cmsg_types(const char *type)
+{
+ char *next = strchr(type, ',');
+ unsigned int len = 0;
+
+ cfg_cmsg_types.cmsg_enabled = 1;
+
+ if (next) {
+ parse_cmsg_types(next + 1);
+ len = next - type;
+ } else {
+ len = strlen(type);
+ }
+
+ if (strncmp(type, "TIMESTAMPNS", len) == 0) {
+ cfg_cmsg_types.timestampns = 1;
+ return;
+ }
+
+ fprintf(stderr, "Unrecognized cmsg option %s\n", type);
+ exit(1);
+}
+
int main_loop(void)
{
int fd;
@@ -801,6 +917,8 @@ int main_loop(void)
set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf);
+ if (cfg_cmsg_types.cmsg_enabled)
+ apply_cmsg_types(fd, &cfg_cmsg_types);
return copyfd_io(0, fd, 1);
}
@@ -887,7 +1005,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:")) != -1) {
+ while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:c:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
@@ -943,6 +1061,9 @@ static void parse_opts(int argc, char **argv)
case 'P':
cfg_peek = parse_peek(optarg);
break;
+ case 'c':
+ parse_cmsg_types(optarg);
+ break;
}
}
@@ -976,6 +1097,8 @@ int main(int argc, char *argv[])
set_sndbuf(fd, cfg_sndbuf);
if (cfg_mark)
set_mark(fd, cfg_mark);
+ if (cfg_cmsg_types.cmsg_enabled)
+ apply_cmsg_types(fd, &cfg_cmsg_types);
return main_loop_s(fd);
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 2b495dc8d78e..559173a8e387 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
time_start=$(date +%s)
-optstring="S:R:d:e:l:r:h4cm:f:t"
+optstring="S:R:d:e:l:r:h4cm:f:tC"
ret=0
sin=""
sout=""
@@ -22,6 +22,7 @@ sndbuf=0
rcvbuf=0
options_log=true
do_tcp=0
+checksum=false
filesize=0
if [ $tc_loss -eq 100 ];then
@@ -47,6 +48,7 @@ usage() {
echo -e "\t-R: set rcvbuf value (default: use kernel default)"
echo -e "\t-m: test mode (poll, sendfile; default: poll)"
echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
+ echo -e "\t-C: enable the MPTCP data checksum"
}
while getopts "$optstring" option;do
@@ -104,6 +106,9 @@ while getopts "$optstring" option;do
"t")
do_tcp=$((do_tcp+1))
;;
+ "C")
+ checksum=true
+ ;;
"?")
usage $0
exit 1
@@ -197,6 +202,12 @@ ip -net "$ns4" link set ns4eth3 up
ip -net "$ns4" route add default via 10.0.3.2
ip -net "$ns4" route add default via dead:beef:3::2
+if $checksum; then
+ for i in "$ns1" "$ns2" "$ns3" "$ns4";do
+ ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1
+ done
+fi
+
set_ethtool_flags() {
local ns="$1"
local dev="$2"
@@ -669,6 +680,25 @@ run_tests_peekmode()
run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
}
+display_time()
+{
+ time_end=$(date +%s)
+ time_run=$((time_end-time_start))
+
+ echo "Time: ${time_run} seconds"
+}
+
+stop_if_error()
+{
+ local msg="$1"
+
+ if [ ${ret} -ne 0 ]; then
+ echo "FAIL: ${msg}" 1>&2
+ display_time
+ exit ${ret}
+ fi
+}
+
make_file "$cin" "client"
make_file "$sin" "server"
@@ -676,6 +706,8 @@ check_mptcp_disabled
check_mptcp_ulp_setsockopt
+stop_if_error "The kernel configuration is not valid for MPTCP"
+
echo "INFO: validating network environment with pings"
for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns1" $sender 10.0.1.1
@@ -695,6 +727,8 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns4" $sender dead:beef:3::1
done
+stop_if_error "Could not even run ping tests"
+
[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
echo -n "INFO: Using loss of $tc_loss "
test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
@@ -722,18 +756,13 @@ echo "on ns3eth4"
tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
-for sender in $ns1 $ns2 $ns3 $ns4;do
- run_tests_lo "$ns1" "$sender" 10.0.1.1 1
- if [ $ret -ne 0 ] ;then
- echo "FAIL: Could not even run loopback test" 1>&2
- exit $ret
- fi
- run_tests_lo "$ns1" $sender dead:beef:1::1 1
- if [ $ret -ne 0 ] ;then
- echo "FAIL: Could not even run loopback v6 test" 2>&1
- exit $ret
- fi
+run_tests_lo "$ns1" "$ns1" 10.0.1.1 1
+stop_if_error "Could not even run loopback test"
+run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1
+stop_if_error "Could not even run loopback v6 test"
+
+for sender in $ns1 $ns2 $ns3 $ns4;do
# ns1<->ns2 is not subject to reordering/tc delays. Use it to test
# mptcp syncookie support.
if [ $sender = $ns1 ]; then
@@ -742,6 +771,9 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
fi
+ run_tests "$ns1" $sender 10.0.1.1
+ run_tests "$ns1" $sender dead:beef:1::1
+
run_tests "$ns2" $sender 10.0.1.2
run_tests "$ns2" $sender dead:beef:1::2
run_tests "$ns2" $sender 10.0.2.1
@@ -754,14 +786,13 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
run_tests "$ns4" $sender 10.0.3.1
run_tests "$ns4" $sender dead:beef:3::1
+
+ stop_if_error "Tests with $sender as a sender have failed"
done
run_tests_peekmode "saveWithPeek"
run_tests_peekmode "saveAfterPeek"
+stop_if_error "Tests with peek mode have failed"
-time_end=$(date +%s)
-time_run=$((time_end-time_start))
-
-echo "Time: ${time_run} seconds"
-
+display_time
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index fd99485cf2a4..255793c5ac4f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -3,8 +3,10 @@
ret=0
sin=""
+sinfail=""
sout=""
cin=""
+cinfail=""
cinsent=""
cout=""
ksft_skip=4
@@ -12,6 +14,7 @@ timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
mptcp_connect=""
capture=0
+checksum=0
do_all_tests=1
TEST_COUNT=0
@@ -49,6 +52,9 @@ init()
ip netns exec $netns sysctl -q net.mptcp.enabled=1
ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
+ if [ $checksum -eq 1 ]; then
+ ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1
+ fi
done
# ns1 ns2
@@ -72,6 +78,14 @@ init()
done
}
+init_shapers()
+{
+ for i in `seq 1 4`; do
+ tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
+ tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
+ done
+}
+
cleanup_partial()
{
rm -f "$capout"
@@ -84,8 +98,8 @@ cleanup_partial()
cleanup()
{
- rm -f "$cin" "$cout"
- rm -f "$sin" "$sout" "$cinsent"
+ rm -f "$cin" "$cout" "$sinfail"
+ rm -f "$sin" "$sout" "$cinsent" "$cinfail"
cleanup_partial
}
@@ -124,6 +138,28 @@ reset_with_add_addr_timeout()
-j DROP
}
+reset_with_checksum()
+{
+ local ns1_enable=$1
+ local ns2_enable=$2
+
+ reset
+
+ ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
+ ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
+}
+
+reset_with_allow_join_id0()
+{
+ local ns1_enable=$1
+ local ns2_enable=$2
+
+ reset
+
+ ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable
+ ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
+}
+
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
@@ -185,11 +221,15 @@ link_failure()
{
ns="$1"
- l=$((RANDOM%4))
- l=$((l+1))
+ if [ -z "$FAILING_LINKS" ]; then
+ l=$((RANDOM%4))
+ FAILING_LINKS=$((l+1))
+ fi
- veth="ns1eth$l"
- ip -net "$ns" link set "$veth" down
+ for l in $FAILING_LINKS; do
+ veth="ns1eth$l"
+ ip -net "$ns" link set "$veth" down
+ done
}
# $1: IP address
@@ -254,10 +294,17 @@ do_transfer()
local_addr="0.0.0.0"
fi
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- ${local_addr} < "$sin" > "$sout" &
+ if [ "$test_link_fail" -eq 2 ];then
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -p $port -s ${cl_proto} \
+ ${local_addr} < "$sinfail" > "$sout" &
+ else
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ ${local_addr} < "$sin" > "$sout" &
+ fi
spid=$!
sleep 1
@@ -268,7 +315,7 @@ do_transfer()
$mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
$connect_addr < "$cin" > "$cout" &
else
- ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \
+ ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
tee "$cinsent" | \
timeout ${timeout_test} \
ip netns exec ${connector_ns} \
@@ -297,17 +344,18 @@ do_transfer()
let rm_nr_ns1=-addr_nr_ns1
if [ $rm_nr_ns1 -lt 8 ]; then
counter=1
+ pos=1
dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
- id=${dump[1]}
sleep 1
while [ $counter -le $rm_nr_ns1 ]
do
+ id=${dump[$pos]}
ip netns exec ${listener_ns} ./pm_nl_ctl del $id
sleep 1
let counter+=1
- let id+=1
+ let pos+=5
done
fi
elif [ $rm_nr_ns1 -eq 8 ]; then
@@ -319,6 +367,12 @@ do_transfer()
fi
fi
+ flags="subflow"
+ if [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
+ flags="${flags},fullmesh"
+ addr_nr_ns2=${addr_nr_ns2:9}
+ fi
+
if [ $addr_nr_ns2 -gt 0 ]; then
let add_nr_ns2=addr_nr_ns2
counter=3
@@ -330,7 +384,7 @@ do_transfer()
else
addr="10.0.$counter.2"
fi
- ip netns exec $ns2 ./pm_nl_ctl add $addr flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add $addr flags $flags
let counter+=1
let add_nr_ns2-=1
done
@@ -339,17 +393,18 @@ do_transfer()
let rm_nr_ns2=-addr_nr_ns2
if [ $rm_nr_ns2 -lt 8 ]; then
counter=1
+ pos=1
dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
if [ ${#dump[@]} -gt 0 ]; then
- id=${dump[1]}
sleep 1
while [ $counter -le $rm_nr_ns2 ]
do
+ id=${dump[$pos]}
ip netns exec ${connector_ns} ./pm_nl_ctl del $id
sleep 1
let counter+=1
- let id+=1
+ let pos+=5
done
fi
elif [ $rm_nr_ns2 -eq 8 ]; then
@@ -408,7 +463,11 @@ do_transfer()
return 1
fi
- check_transfer $sin $cout "file received by client"
+ if [ "$test_link_fail" -eq 2 ];then
+ check_transfer $sinfail $cout "file received by client"
+ else
+ check_transfer $sin $cout "file received by client"
+ fi
retc=$?
if [ "$test_link_fail" -eq 0 ];then
check_transfer $cin $sout "file received by server"
@@ -451,28 +510,108 @@ run_tests()
lret=0
oldin=""
- if [ "$test_linkfail" -eq 1 ];then
- size=$((RANDOM%1024))
+ # create the input file for the failure test when
+ # the first failure test run
+ if [ "$test_linkfail" -ne 0 -a -z "$cinfail" ]; then
+ # the client file must be considerably larger
+ # of the maximum expected cwin value, or the
+ # link utilization will be not predicable
+ size=$((RANDOM%2))
size=$((size+1))
- size=$((size*128))
+ size=$((size*8192))
+ size=$((size + ( $RANDOM % 8192) ))
- oldin=$(mktemp)
- cp "$cin" "$oldin"
- make_file "$cin" "client" $size
+ cinfail=$(mktemp)
+ make_file "$cinfail" "client" $size
+ fi
+
+ if [ "$test_linkfail" -eq 2 -a -z "$sinfail" ]; then
+ size=$((RANDOM%16))
+ size=$((size+1))
+ size=$((size*2048))
+
+ sinfail=$(mktemp)
+ make_file "$sinfail" "server" $size
fi
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${bkup}
lret=$?
+}
+
+chk_csum_nr()
+{
+ local msg=${1:-""}
+ local count
+ local dump_stats
+
+ if [ ! -z "$msg" ]; then
+ printf "%02u" "$TEST_COUNT"
+ else
+ echo -n " "
+ fi
+ printf " %-36s %s" "$msg" "sum"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != 0 ]; then
+ echo "[fail] got $count data checksum error[s] expected 0"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+ echo -n " - csum "
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != 0 ]; then
+ echo "[fail] got $count data checksum error[s] expected 0"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
+chk_fail_nr()
+{
+ local mp_fail_nr_tx=$1
+ local mp_fail_nr_rx=$2
+ local count
+ local dump_stats
+
+ printf "%-39s %s" " " "ftx"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$mp_fail_nr_tx" ]; then
+ echo "[fail] got $count MP_FAIL[s] TX expected $mp_fail_nr_tx"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
- if [ "$test_linkfail" -eq 1 ];then
- cp "$oldin" "$cin"
- rm -f "$oldin"
+ echo -n " - frx "
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$mp_fail_nr_rx" ]; then
+ echo "[fail] got $count MP_FAIL[s] RX expected $mp_fail_nr_rx"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
fi
- if [ $lret -ne 0 ]; then
- ret=$lret
- return
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
fi
}
@@ -523,6 +662,50 @@ chk_join_nr()
echo Client ns stats
ip netns exec $ns2 nstat -as | grep MPTcp
fi
+ if [ $checksum -eq 1 ]; then
+ chk_csum_nr
+ chk_fail_nr 0 0
+ fi
+}
+
+# a negative value for 'stale_max' means no upper bound:
+# for bidirectional transfer, if one peer sleep for a while
+# - as these tests do - we can have a quite high number of
+# stale/recover conversions, proportional to
+# sleep duration/ MPTCP-level RTX interval.
+chk_stale_nr()
+{
+ local ns=$1
+ local stale_min=$2
+ local stale_max=$3
+ local stale_delta=$4
+ local dump_stats
+ local stale_nr
+ local recover_nr
+
+ printf "%-39s %-18s" " " "stale"
+ stale_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowStale | awk '{print $2}'`
+ [ -z "$stale_nr" ] && stale_nr=0
+ recover_nr=`ip netns exec $ns nstat -as | grep MPTcpExtSubflowRecover | awk '{print $2}'`
+ [ -z "$recover_nr" ] && recover_nr=0
+
+ if [ $stale_nr -lt $stale_min ] ||
+ [ $stale_max -gt 0 -a $stale_nr -gt $stale_max ] ||
+ [ $((stale_nr - $recover_nr)) -ne $stale_delta ]; then
+ echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \
+ " expected stale in range [$stale_min..$stale_max]," \
+ " stale-recover delta $stale_delta "
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+
+ if [ "${dump_stats}" = 1 ]; then
+ echo $ns stats
+ ip netns exec $ns ip -s link show
+ ip netns exec $ns nstat -as | grep MPTcp
+ fi
}
chk_add_nr()
@@ -733,6 +916,27 @@ chk_prio_nr()
fi
}
+chk_link_usage()
+{
+ local ns=$1
+ local link=$2
+ local out=$3
+ local expected_rate=$4
+ local tx_link=`ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes`
+ local tx_total=`ls -l $out | awk '{print $5}'`
+ local tx_rate=$((tx_link * 100 / $tx_total))
+ local tolerance=5
+
+ printf "%-39s %-18s" " " "link usage"
+ if [ $tx_rate -lt $((expected_rate - $tolerance)) -o \
+ $tx_rate -gt $((expected_rate + $tolerance)) ]; then
+ echo "[fail] got $tx_rate% usage, expected $expected_rate%"
+ ret=1
+ else
+ echo "[ ok ]"
+ fi
+}
+
subflows_tests()
{
reset
@@ -850,20 +1054,101 @@ signal_address_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "signal invalid addresses" 1 1 1
chk_add_nr 3 3
+
+ # signal addresses race test
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 4 4
+ ip netns exec $ns2 ./pm_nl_ctl limits 4 4
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.1.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_add_nr 4 4
}
link_failure_tests()
{
# accept and use add_addr with additional subflows and link loss
reset
+
+ # without any b/w limit each veth could spool the packets and get
+ # them acked at xmit time, so that the corresponding subflow will
+ # have almost always no outstanding pkts, the scheduler will pick
+ # always the first subflow and we will have hard time testing
+ # active backup and link switch-over.
+ # Let's set some arbitrary (low) virtual link limits.
+ init_shapers
ip netns exec $ns1 ./pm_nl_ctl limits 0 3
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 3
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
run_tests $ns1 $ns2 10.0.1.1 1
chk_join_nr "multiple flows, signal, link failure" 3 3 3
chk_add_nr 1 1
+ chk_stale_nr $ns2 1 5 1
+
+ # accept and use add_addr with additional subflows and link loss
+ # for bidirectional transfer
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 dev ns2eth4 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 2
+ chk_join_nr "multi flows, signal, bidi, link fail" 3 3 3
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 1 -1 1
+
+ # 2 subflows plus 1 backup subflow with a lossy link, backup
+ # will never be used
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ export FAILING_LINKS="1"
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ run_tests $ns1 $ns2 10.0.1.1 1
+ chk_join_nr "backup subflow unused, link failure" 2 2 2
+ chk_add_nr 1 1
+ chk_link_usage $ns2 ns2eth3 $cinsent 0
+
+ # 2 lossy links after half transfer, backup will get half of
+ # the traffic
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ export FAILING_LINKS="1 2"
+ run_tests $ns1 $ns2 10.0.1.1 1
+ chk_join_nr "backup flow used, multi links fail" 2 2 2
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 2 4 2
+ chk_link_usage $ns2 ns2eth3 $cinsent 50
+
+ # use a backup subflow with the first subflow on a lossy link
+ # for bidirectional transfer
+ reset
+ init_shapers
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 dev ns2eth3 flags subflow,backup
+ run_tests $ns1 $ns2 10.0.1.1 2
+ chk_join_nr "backup flow used, bidi, link failure" 2 2 2
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 1 -1 2
+ chk_link_usage $ns2 ns2eth3 $cinsent 50
}
add_addr_timeout_tests()
@@ -1341,7 +1626,7 @@ syncookies_tests()
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr "subflows limited by server w cookies" 2 2 1
+ chk_join_nr "subflows limited by server w cookies" 2 1 1
# test signal address with cookies
reset_with_cookies
@@ -1374,6 +1659,143 @@ syncookies_tests()
chk_add_nr 1 1
}
+checksum_tests()
+{
+ # checksum test 0 0
+ reset_with_checksum 0 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 0 0"
+
+ # checksum test 1 1
+ reset_with_checksum 1 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 1 1"
+
+ # checksum test 0 1
+ reset_with_checksum 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 0 1"
+
+ # checksum test 1 0
+ reset_with_checksum 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_csum_nr "checksum test 1 0"
+}
+
+deny_join_id0_tests()
+{
+ # subflow allow join id0 ns1
+ reset_with_allow_join_id0 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "single subflow allow join id0 ns1" 1 1 1
+
+ # subflow allow join id0 ns2
+ reset_with_allow_join_id0 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "single subflow allow join id0 ns2" 0 0 0
+
+ # signal address allow join id0 ns1
+ # ADD_ADDRs are not affected by allow_join_id0 value.
+ reset_with_allow_join_id0 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "signal address allow join id0 ns1" 1 1 1
+ chk_add_nr 1 1
+
+ # signal address allow join id0 ns2
+ # ADD_ADDRs are not affected by allow_join_id0 value.
+ reset_with_allow_join_id0 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "signal address allow join id0 ns2" 1 1 1
+ chk_add_nr 1 1
+
+ # subflow and address allow join id0 ns1
+ reset_with_allow_join_id0 1 0
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "subflow and address allow join id0 1" 2 2 2
+
+ # subflow and address allow join id0 ns2
+ reset_with_allow_join_id0 0 1
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "subflow and address allow join id0 2" 1 1 1
+}
+
+fullmesh_tests()
+{
+ # fullmesh 1
+ # 2 fullmesh addrs in ns2, added before the connection,
+ # 1 non-fullmesh addr in ns1, added during the connection.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 4
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 4
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow,fullmesh
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow,fullmesh
+ run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow
+ chk_join_nr "fullmesh test 2x1" 4 4 4
+ chk_add_nr 1 1
+
+ # fullmesh 2
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 1 fullmesh addr in ns2, added during the connection.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow
+ chk_join_nr "fullmesh test 1x1" 3 3 3
+ chk_add_nr 1 1
+
+ # fullmesh 3
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 2 fullmesh addrs in ns2, added during the connection.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 5
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 5
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+ chk_join_nr "fullmesh test 1x2" 5 5 5
+ chk_add_nr 1 1
+
+ # fullmesh 4
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 2 fullmesh addrs in ns2, added during the connection,
+ # limit max_subflows to 4.
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 4
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 4
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow
+ chk_join_nr "fullmesh test 1x2, limited" 4 4 4
+ chk_add_nr 1 1
+}
+
all_tests()
{
subflows_tests
@@ -1387,6 +1809,9 @@ all_tests()
backup_tests
add_addr_ports_tests
syncookies_tests
+ checksum_tests
+ deny_join_id0_tests
+ fullmesh_tests
}
usage()
@@ -1403,7 +1828,11 @@ usage()
echo " -b backup_tests"
echo " -p add_addr_ports_tests"
echo " -k syncookies_tests"
+ echo " -S checksum_tests"
+ echo " -d deny_join_id0_tests"
+ echo " -m fullmesh_tests"
echo " -c capture pcap files"
+ echo " -C enable data checksum"
echo " -h help"
}
@@ -1418,13 +1847,16 @@ make_file "$sin" "server" 1
trap cleanup EXIT
for arg in "$@"; do
- # check for "capture" arg before launching tests
+ # check for "capture/checksum" args before launching tests
if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then
capture=1
fi
+ if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"C"[0-9a-zA-Z]*$ ]]; then
+ checksum=1
+ fi
- # exception for the capture option, the rest means: a part of the tests
- if [ "${arg}" != "-c" ]; then
+ # exception for the capture/checksum options, the rest means: a part of the tests
+ if [ "${arg}" != "-c" ] && [ "${arg}" != "-C" ]; then
do_all_tests=0
fi
done
@@ -1434,7 +1866,7 @@ if [ $do_all_tests -eq 1 ]; then
exit $ret
fi
-while getopts 'fsltra64bpkch' opt; do
+while getopts 'fsltra64bpkdmchCS' opt; do
case $opt in
f)
subflows_tests
@@ -1469,8 +1901,19 @@ while getopts 'fsltra64bpkch' opt; do
k)
syncookies_tests
;;
+ S)
+ checksum_tests
+ ;;
+ d)
+ deny_join_id0_tests
+ ;;
+ m)
+ fullmesh_tests
+ ;;
c)
;;
+ C)
+ ;;
h | *)
usage
;;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 2fa13946ac04..1579e471a5e7 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -178,7 +178,7 @@ do_transfer()
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
- $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} \
+ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c TIMESTAMPNS \
${local_addr} < "$sin" > "$sout" &
spid=$!
@@ -186,7 +186,7 @@ do_transfer()
timeout ${timeout_test} \
ip netns exec ${connector_ns} \
- $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} \
+ $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c TIMESTAMPNS \
$connect_addr < "$cin" > "$cout" &
cpid=$!
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 115decfdc1ef..354784512748 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -25,7 +25,7 @@
static void syntax(char *argv[])
{
fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
- fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
+ fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
@@ -236,11 +236,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
else if (!strcmp(tok, "backup"))
flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else if (!strcmp(tok, "fullmesh"))
+ flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
else
error(1, errno,
"unknown flag %s", argv[arg]);
}
+ if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
+ flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+ error(1, errno, "error flag fullmesh");
+ }
+
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
rta->rta_len = RTA_LENGTH(4);
@@ -422,6 +429,13 @@ static void print_addr(struct rtattr *attrs, int len)
printf(",");
}
+ if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+ printf("fullmesh");
+ flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH;
+ if (flags)
+ printf(",");
+ }
+
/* bump unknown flags, if any */
if (flags)
printf("0x%x", flags);
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 3aeef3bcb101..910d8126af8f 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -22,8 +22,8 @@ usage() {
cleanup()
{
- rm -f "$cin" "$cout"
- rm -f "$sin" "$sout"
+ rm -f "$cout" "$sout"
+ rm -f "$large" "$small"
rm -f "$capout"
local netns
@@ -60,6 +60,8 @@ setup()
for i in "$ns1" "$ns2" "$ns3";do
ip netns add $i || exit $ksft_skip
ip -net $i link set lo up
+ ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0
+ ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0
done
ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
@@ -80,7 +82,6 @@ setup()
ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
- ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0
ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index 6365c7fd1262..b599003eb5ba 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -11,9 +11,11 @@
#include <sys/socket.h>
#include <sys/wait.h>
#include <linux/tcp.h>
+#include <linux/udp.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <netinet/in.h>
+#include <netinet/ip.h>
#include <netdb.h>
#include <fcntl.h>
#include <libgen.h>
@@ -26,6 +28,11 @@
#include <unistd.h>
#include <time.h>
#include <errno.h>
+#include <getopt.h>
+
+#include <linux/xfrm.h>
+#include <linux/ipsec.h>
+#include <linux/pfkeyv2.h>
#ifndef IPV6_UNICAST_IF
#define IPV6_UNICAST_IF 76
@@ -95,6 +102,8 @@ struct sock_args {
struct sockaddr_in6 v6;
} md5_prefix;
unsigned int prefix_len;
+ /* 0: default, -1: force off, +1: force on */
+ int bind_key_ifindex;
/* expected addresses and device index for connection */
const char *expected_dev;
@@ -114,6 +123,9 @@ struct sock_args {
struct in_addr in;
struct in6_addr in6;
} expected_raddr;
+
+ /* ESP in UDP encap test */
+ int use_xfrm;
};
static int server_mode;
@@ -262,11 +274,14 @@ static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args
}
memcpy(&md5sig.tcpm_addr, addr, alen);
- if (args->ifindex) {
+ if ((args->ifindex && args->bind_key_ifindex >= 0) || args->bind_key_ifindex >= 1) {
opt = TCP_MD5SIG_EXT;
md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX;
md5sig.tcpm_ifindex = args->ifindex;
+ log_msg("TCP_MD5SIG_FLAG_IFINDEX set tcpm_ifindex=%d\n", md5sig.tcpm_ifindex);
+ } else {
+ log_msg("TCP_MD5SIG_FLAG_IFINDEX off\n", md5sig.tcpm_ifindex);
}
rc = setsockopt(sd, IPPROTO_TCP, opt, &md5sig, sizeof(md5sig));
@@ -1346,6 +1361,41 @@ static int bind_socket(int sd, struct sock_args *args)
return 0;
}
+static int config_xfrm_policy(int sd, struct sock_args *args)
+{
+ struct xfrm_userpolicy_info policy = {};
+ int type = UDP_ENCAP_ESPINUDP;
+ int xfrm_af = IP_XFRM_POLICY;
+ int level = SOL_IP;
+
+ if (args->type != SOCK_DGRAM) {
+ log_error("Invalid socket type. Only DGRAM could be used for XFRM\n");
+ return 1;
+ }
+
+ policy.action = XFRM_POLICY_ALLOW;
+ policy.sel.family = args->version;
+ if (args->version == AF_INET6) {
+ xfrm_af = IPV6_XFRM_POLICY;
+ level = SOL_IPV6;
+ }
+
+ policy.dir = XFRM_POLICY_OUT;
+ if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0)
+ return 1;
+
+ policy.dir = XFRM_POLICY_IN;
+ if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0)
+ return 1;
+
+ if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) {
+ log_err_errno("Failed to set xfrm encap");
+ return 1;
+ }
+
+ return 0;
+}
+
static int lsock_init(struct sock_args *args)
{
long flags;
@@ -1389,6 +1439,11 @@ static int lsock_init(struct sock_args *args)
if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0)
log_err_errno("Failed to set close-on-exec flag");
+ if (args->use_xfrm && config_xfrm_policy(sd, args)) {
+ log_err_errno("Failed to set xfrm policy");
+ goto err;
+ }
+
out:
return sd;
@@ -1772,7 +1827,15 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
return client_status;
}
-#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6L:0:1:2:3:Fbq"
+#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq"
+#define OPT_FORCE_BIND_KEY_IFINDEX 1001
+#define OPT_NO_BIND_KEY_IFINDEX 1002
+
+static struct option long_opts[] = {
+ {"force-bind-key-ifindex", 0, 0, OPT_FORCE_BIND_KEY_IFINDEX},
+ {"no-bind-key-ifindex", 0, 0, OPT_NO_BIND_KEY_IFINDEX},
+ {0, 0, 0, 0}
+};
static void print_usage(char *prog)
{
@@ -1795,6 +1858,7 @@ static void print_usage(char *prog)
" -D|R datagram (D) / raw (R) socket (default stream)\n"
" -l addr local address to bind to in server mode\n"
" -c addr local address to bind to in client mode\n"
+ " -x configure XFRM policy on socket\n"
"\n"
" -d dev bind socket to given device name\n"
" -I dev bind socket to given device name - server mode\n"
@@ -1808,6 +1872,10 @@ static void print_usage(char *prog)
" -M password use MD5 sum protection\n"
" -X password MD5 password for client mode\n"
" -m prefix/len prefix and length to use for MD5 key\n"
+ " --no-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX off\n"
+ " --force-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX on\n"
+ " (default: only if -I is passed)\n"
+ "\n"
" -g grp multicast group (e.g., 239.1.1.1)\n"
" -i interactive mode (default is echo and terminate)\n"
"\n"
@@ -1843,7 +1911,7 @@ int main(int argc, char *argv[])
* process input args
*/
- while ((rc = getopt(argc, argv, GETOPT_STR)) != -1) {
+ while ((rc = getopt_long(argc, argv, GETOPT_STR, long_opts, NULL)) != -1) {
switch (rc) {
case 'B':
both_mode = 1;
@@ -1916,6 +1984,12 @@ int main(int argc, char *argv[])
case 'M':
args.password = optarg;
break;
+ case OPT_FORCE_BIND_KEY_IFINDEX:
+ args.bind_key_ifindex = 1;
+ break;
+ case OPT_NO_BIND_KEY_IFINDEX:
+ args.bind_key_ifindex = -1;
+ break;
case 'X':
args.client_pw = optarg;
break;
@@ -1966,6 +2040,9 @@ int main(int argc, char *argv[])
case 'q':
quiet = 1;
break;
+ case 'x':
+ args.use_xfrm = 1;
+ break;
default:
print_usage(argv[0]);
return 1;
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 64cd2e23c568..543ad7513a8e 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -118,6 +118,16 @@
# below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
# changes alone won't affect PMTU
#
+# - pmtu_vti4_udp_exception
+# Same as pmtu_vti4_exception, but using ESP-in-UDP
+#
+# - pmtu_vti4_udp_routed_exception
+# Set up vti tunnel on top of veth connected through routing namespace and
+# add xfrm states and policies with ESP-in-UDP encapsulation. Check that
+# route exception is not created if link layer MTU is not exceeded, then
+# lower MTU on second part of routed environment and check that exception
+# is created with the expected PMTU.
+#
# - pmtu_vti6_exception
# Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
# namespaces with matching endpoints. Check that route exception is
@@ -125,6 +135,13 @@
# decrease and increase MTU of tunnel, checking that route exception PMTU
# changes accordingly
#
+# - pmtu_vti6_udp_exception
+# Same as pmtu_vti6_exception, but using ESP-in-UDP
+#
+# - pmtu_vti6_udp_routed_exception
+# Same as pmtu_vti6_udp_routed_exception but with routing between vti
+# endpoints
+#
# - pmtu_vti4_default_mtu
# Set up vti4 tunnel on top of veth, in two namespaces with matching
# endpoints. Check that MTU assigned to vti interface is the MTU of the
@@ -224,6 +241,10 @@ tests="
pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1
pmtu_vti6_exception vti6: PMTU exceptions 0
pmtu_vti4_exception vti4: PMTU exceptions 0
+ pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0
+ pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0
+ pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0
+ pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0
pmtu_vti4_default_mtu vti4: default MTU assignment 0
pmtu_vti6_default_mtu vti6: default MTU assignment 0
pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0
@@ -246,7 +267,6 @@ ns_b="ip netns exec ${NS_B}"
ns_c="ip netns exec ${NS_C}"
ns_r1="ip netns exec ${NS_R1}"
ns_r2="ip netns exec ${NS_R2}"
-
# Addressing and routing for tests with routers: four network segments, with
# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
# identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2).
@@ -279,7 +299,6 @@ routes="
A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2
B default ${prefix6}:${b_r1}::2
"
-
USE_NH="no"
# ns family nh id destination gateway
nexthops="
@@ -326,6 +345,7 @@ dummy6_mask="64"
err_buf=
tcpdump_pids=
+nettest_pids=
err() {
err_buf="${err_buf}${1}
@@ -548,6 +568,14 @@ setup_vti6() {
setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
}
+setup_vti4routed() {
+ setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
+}
+
+setup_vti6routed() {
+ setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
+}
+
setup_vxlan_or_geneve() {
type="${1}"
a_addr="${2}"
@@ -619,18 +647,36 @@ setup_xfrm() {
proto=${1}
veth_a_addr="${2}"
veth_b_addr="${3}"
+ encap=${4}
- run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
- run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+ run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1
+ run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
- run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
- run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+ run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
+ run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
}
+setup_nettest_xfrm() {
+ which nettest >/dev/null
+ if [ $? -ne 0 ]; then
+ echo "'nettest' command not found; skipping tests"
+ return 1
+ fi
+
+ [ ${1} -eq 6 ] && proto="-6" || proto=""
+ port=${2}
+
+ run_cmd ${ns_a} nettest ${proto} -q -D -s -x -p ${port} -t 5 &
+ nettest_pids="${nettest_pids} $!"
+
+ run_cmd ${ns_b} nettest ${proto} -q -D -s -x -p ${port} -t 5 &
+ nettest_pids="${nettest_pids} $!"
+}
+
setup_xfrm4() {
setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
}
@@ -639,6 +685,26 @@ setup_xfrm6() {
setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
}
+setup_xfrm4udp() {
+ setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0"
+ setup_nettest_xfrm 4 4500
+}
+
+setup_xfrm6udp() {
+ setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0"
+ setup_nettest_xfrm 6 4500
+}
+
+setup_xfrm4udprouted() {
+ setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0"
+ setup_nettest_xfrm 4 4500
+}
+
+setup_xfrm6udprouted() {
+ setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0"
+ setup_nettest_xfrm 6 4500
+}
+
setup_routing_old() {
for i in ${routes}; do
[ "${ns}" = "" ] && ns="${i}" && continue
@@ -823,6 +889,11 @@ cleanup() {
done
tcpdump_pids=
+ for pid in ${nettest_pids}; do
+ kill ${pid}
+ done
+ nettest_pids=
+
for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
ip netns del ${n} 2> /dev/null
done
@@ -1432,6 +1503,135 @@ test_pmtu_vti6_exception() {
return ${fail}
}
+test_pmtu_vti4_udp_exception() {
+ setup namespaces veth vti4 xfrm4udp || return $ksft_skip
+ trace "${ns_a}" veth_a "${ns_b}" veth_b \
+ "${ns_a}" vti4_a "${ns_b}" vti4_b
+
+ veth_mtu=1500
+ vti_mtu=$((veth_mtu - 20))
+
+ # UDP SPI SN IV ICV pad length next header
+ esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1))
+ ping_payload=$((esp_payload_rfc4106 - 28))
+
+ mtu "${ns_a}" veth_a ${veth_mtu}
+ mtu "${ns_b}" veth_b ${veth_mtu}
+ mtu "${ns_a}" vti4_a ${vti_mtu}
+ mtu "${ns_b}" vti4_b ${vti_mtu}
+
+ # Send DF packet without exceeding link layer MTU, check that no
+ # exception is created
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+ check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+ # Now exceed link layer MTU by one byte, check that exception is created
+ # with the right PMTU value
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+ check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
+}
+
+test_pmtu_vti6_udp_exception() {
+ setup namespaces veth vti6 xfrm6udp || return $ksft_skip
+ trace "${ns_a}" veth_a "${ns_b}" veth_b \
+ "${ns_a}" vti6_a "${ns_b}" vti6_b
+ fail=0
+
+ # Create route exception by exceeding link layer MTU
+ mtu "${ns_a}" veth_a 4000
+ mtu "${ns_b}" veth_b 4000
+ mtu "${ns_a}" vti6_a 5000
+ mtu "${ns_b}" vti6_b 5000
+ run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
+
+ # Check that exception was created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
+
+ # Decrease tunnel MTU, check for PMTU decrease in route exception
+ mtu "${ns_a}" vti6_a 3000
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
+
+ # Increase tunnel MTU, check for PMTU increase in route exception
+ mtu "${ns_a}" vti6_a 9000
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
+
+ return ${fail}
+}
+
+test_pmtu_vti4_udp_routed_exception() {
+ setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip
+ trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \
+ "${ns_a}" vti4_a "${ns_b}" vti4_b
+
+ veth_mtu=1500
+ vti_mtu=$((veth_mtu - 20))
+
+ # UDP SPI SN IV ICV pad length next header
+ esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1))
+ ping_payload=$((esp_payload_rfc4106 - 28))
+
+ mtu "${ns_a}" veth_A-R1 ${veth_mtu}
+ mtu "${ns_r1}" veth_R1-A ${veth_mtu}
+ mtu "${ns_b}" veth_B-R1 ${veth_mtu}
+ mtu "${ns_r1}" veth_R1-B ${veth_mtu}
+
+ mtu "${ns_a}" vti4_a ${vti_mtu}
+ mtu "${ns_b}" vti4_b ${vti_mtu}
+
+ # Send DF packet without exceeding link layer MTU, check that no
+ # exception is created
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+ check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+ # Now decrease link layer MTU by 8 bytes on R1, check that exception is created
+ # with the right PMTU value
+ mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+ check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
+}
+
+test_pmtu_vti6_udp_routed_exception() {
+ setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip
+ trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \
+ "${ns_a}" vti6_a "${ns_b}" vti6_b
+
+ veth_mtu=1500
+ vti_mtu=$((veth_mtu - 40))
+
+ # UDP SPI SN IV ICV pad length next header
+ esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1))
+ ping_payload=$((esp_payload_rfc4106 - 48))
+
+ mtu "${ns_a}" veth_A-R1 ${veth_mtu}
+ mtu "${ns_r1}" veth_R1-A ${veth_mtu}
+ mtu "${ns_b}" veth_B-R1 ${veth_mtu}
+ mtu "${ns_r1}" veth_R1-B ${veth_mtu}
+
+ # mtu "${ns_a}" vti6_a ${vti_mtu}
+ # mtu "${ns_b}" vti6_b ${vti_mtu}
+
+ run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr}
+
+ # Check that exception was not created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+ # Now decrease link layer MTU by 8 bytes on R1, check that exception is created
+ # with the right PMTU value
+ mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
+ run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
+
+}
+
test_pmtu_vti4_default_mtu() {
setup namespaces veth vti4 || return $ksft_skip
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index db4521335722..3653d6468c67 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -111,8 +111,8 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
static void sock_fanout_set_cbpf(int fd)
{
struct sock_filter bpf_filter[] = {
- BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80), /* ldb [80] */
- BPF_STMT(BPF_RET+BPF_A, 0), /* ret A */
+ BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 80), /* ldb [80] */
+ BPF_STMT(BPF_RET | BPF_A, 0), /* ret A */
};
struct sock_fprog bpf_prog;
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
index 170be65e0816..1cbfeb5052ec 100755
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -86,9 +86,6 @@ echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
echo "raw gso min size"
./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
-echo "raw gso min size - 1 (expected to fail)"
-(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
-
echo "raw gso max size"
./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index 8b42e8b04e0f..a59cb6a3c4f5 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -1,9 +1,12 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
if [ $(id -u) != 0 ]; then
echo $msg must be run as root >&2
- exit 0
+ exit $ksft_skip
fi
ret=0
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
new file mode 100755
index 000000000000..e57bbfbc5208
--- /dev/null
+++ b/tools/testing/selftests/net/setup_loopback.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
+readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
+readonly HARD_IRQS="$(< ${IRQ_PATH})"
+
+netdev_check_for_carrier() {
+ local -r dev="$1"
+
+ for i in {1..5}; do
+ carrier="$(cat /sys/class/net/${dev}/carrier)"
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "carrier not ready yet..." >&2
+ sleep 1
+ else
+ echo "carrier ready" >&2
+ break
+ fi
+ done
+ echo "${carrier}"
+}
+
+# Assumes that there is no existing ipvlan device on the physical device
+setup_loopback_environment() {
+ local dev="$1"
+
+ # Fail hard if cannot turn on loopback mode for current NIC
+ ethtool -K "${dev}" loopback on || exit 1
+ sleep 1
+
+ # Check for the carrier
+ carrier=$(netdev_check_for_carrier ${dev})
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "setup_loopback_environment failed"
+ exit 1
+ fi
+}
+
+setup_macvlan_ns(){
+ local -r link_dev="$1"
+ local -r ns_name="$2"
+ local -r ns_dev="$3"
+ local -r ns_mac="$4"
+ local -r addr="$5"
+
+ ip link add link "${link_dev}" dev "${ns_dev}" \
+ address "${ns_mac}" type macvlan
+ exit_code=$?
+ if [[ "${exit_code}" -ne 0 ]]; then
+ echo "setup_macvlan_ns failed"
+ exit $exit_code
+ fi
+
+ [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+ ip link set dev "${ns_dev}" netns "${ns_name}"
+ ip -netns "${ns_name}" link set dev "${ns_dev}" up
+ if [[ -n "${addr}" ]]; then
+ ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
+ fi
+
+ sleep 1
+}
+
+cleanup_macvlan_ns(){
+ while (( $# >= 2 )); do
+ ns_name="$1"
+ ns_dev="$2"
+ ip -netns "${ns_name}" link del dev "${ns_dev}"
+ ip netns del "${ns_name}"
+ shift 2
+ done
+}
+
+cleanup_loopback(){
+ local -r dev="$1"
+
+ ethtool -K "${dev}" loopback off
+ sleep 1
+
+ # Check for the carrier
+ carrier=$(netdev_check_for_carrier ${dev})
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "setup_loopback_environment failed"
+ exit 1
+ fi
+}
+
+setup_interrupt() {
+ # Use timer on host to trigger the network stack
+ # Also disable device interrupt to not depend on NIC interrupt
+ # Reduce test flakiness caused by unexpected interrupts
+ echo 100000 >"${FLUSH_PATH}"
+ echo 50 >"${IRQ_PATH}"
+}
+
+setup_ns() {
+ # Set up server_ns namespace and client_ns namespace
+ setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}"
+ setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+ cleanup_macvlan_ns server_ns server client_ns client
+}
+
+setup() {
+ setup_loopback_environment "${dev}"
+ setup_interrupt
+}
+
+cleanup() {
+ cleanup_loopback "${dev}"
+
+ echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+ echo "${HARD_IRQS}" >"${IRQ_PATH}"
+}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
new file mode 100644
index 000000000000..1003ddf7b3b2
--- /dev/null
+++ b/tools/testing/selftests/net/setup_veth.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+setup_veth_ns() {
+ local -r link_dev="$1"
+ local -r ns_name="$2"
+ local -r ns_dev="$3"
+ local -r ns_mac="$4"
+
+ [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+ echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
+ ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
+ ip -netns "${ns_name}" link set dev "${ns_dev}" up
+
+ ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
+}
+
+setup_ns() {
+ # Set up server_ns namespace and client_ns namespace
+ ip link add name server type veth peer name client
+
+ setup_veth_ns "${dev}" server_ns server "${SERVER_MAC}"
+ setup_veth_ns "${dev}" client_ns client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+ local ns_name
+
+ for ns_name in client_ns server_ns; do
+ [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
+ done
+}
+
+setup() {
+ # no global init setup step needed
+ :
+}
+
+cleanup() {
+ cleanup_ns
+}
diff --git a/tools/testing/selftests/net/so_netns_cookie.c b/tools/testing/selftests/net/so_netns_cookie.c
new file mode 100644
index 000000000000..b39e87e967cd
--- /dev/null
+++ b/tools/testing/selftests/net/so_netns_cookie.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+#define pr_err(fmt, ...) \
+ ({ \
+ fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+ __func__, __LINE__, ##__VA_ARGS__); \
+ 1; \
+ })
+
+int main(int argc, char *argvp[])
+{
+ uint64_t cookie1, cookie2;
+ socklen_t vallen;
+ int sock1, sock2;
+
+ sock1 = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock1 < 0)
+ return pr_err("Unable to create TCP socket");
+
+ vallen = sizeof(cookie1);
+ if (getsockopt(sock1, SOL_SOCKET, SO_NETNS_COOKIE, &cookie1, &vallen) != 0)
+ return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)");
+
+ if (!cookie1)
+ return pr_err("SO_NETNS_COOKIE returned zero cookie");
+
+ if (unshare(CLONE_NEWNET))
+ return pr_err("unshare");
+
+ sock2 = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock2 < 0)
+ return pr_err("Unable to create TCP socket");
+
+ vallen = sizeof(cookie2);
+ if (getsockopt(sock2, SOL_SOCKET, SO_NETNS_COOKIE, &cookie2, &vallen) != 0)
+ return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)");
+
+ if (!cookie2)
+ return pr_err("SO_NETNS_COOKIE returned zero cookie");
+
+ if (cookie1 == cookie2)
+ return pr_err("SO_NETNS_COOKIE returned identical cookies for distinct ns");
+
+ close(sock1);
+ close(sock2);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
new file mode 100755
index 000000000000..aebaab8ce44c
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -0,0 +1,576 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
+
+# This test is designed for evaluating the new SRv6 End.DT46 Behavior used for
+# implementing IPv4/IPv6 L3 VPN use cases.
+#
+# The current SRv6 code in the Linux kernel only implements SRv6 End.DT4 and
+# End.DT6 Behaviors which can be used respectively to support IPv4-in-IPv6 and
+# IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a
+# single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic.
+# The SRv6 End.DT46 Behavior implementation is meant to support the
+# decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel.
+# Therefore, the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies
+# the setup and operations of SRv6 VPNs.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4/IPv6 L3 VPN services allowing hosts to communicate with
+# each other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the
+# IPv4/IPv6 L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected
+# using the IPv4/IPv6 L3 VPN of tenant 200. Cross connection between tenant 100
+# and tenant 200 is forbidden and thus, for example, hs-t100-1 cannot reach
+# hs-t200-3 and vice versa.
+#
+# Routers rt-1 and rt-2 implement IPv4/IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT46 Behavior and c) VRF.
+#
+# To explain how an IPv4/IPv6 L3 VPN based on SRv6 works, let us briefly
+# consider an example where, within the same domain of tenant 100, the host
+# hs-t100-1 pings the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as a arp/ndp proxy.
+#
+# When the host hs-t100-1 sends an IPv6 or IPv4 packet destined to hs-t100-2,
+# the router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv6 or IPv4 packet in a IPv6 plus the
+# Segment Routing Header (SRH) packet. This packet is sent through the (IPv6)
+# core network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT46 Behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv6 or IPv4 packet. Afterwards, the packet is sent to the
+# host hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the roles of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4/IPv6 L3 VPN for tenant 200 works exactly as the IPv4/IPv6
+# L3 VPN for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are
+# able to connect with each other.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-t100-1 netns | | hs-t100-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::1/64 | | | | cafe::2/64 | |
+# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | cafe::254/64 | | | | cafe::254/64 | |
+# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | |
+# | +-------+-------+ | localsid | | | | localsid | +-------+-------- |
+# | | | table | | | | table | | |
+# | +----+----+ +----------+ | | +----------+ +----+----+ |
+# | | vrf-100 | | | | vrf-100 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | veth0 | | | | veth0 | |
+# | | fd00::1/64 |.|...|.| fd00::2/64 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | vrf-200 | | | | vrf-200 | |
+# | +----+----+ | | +----+----+ |
+# | | | | | |
+# | +-------+-------+ | | +-------+-------- |
+# | | veth-t200 | | | | veth-t200 | |
+# | | cafe::254/64 | | | | cafe::254/64 | |
+# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | |
+# | +---------------+ rt-1 netns | | rt-2 netns +---------------- |
+# | . | | . |
+# +-----------------------------------+ +-----------------------------------+
+# . .
+# . .
+# . .
+# . .
+# +-------------------+ +-------------------+
+# | . | | . |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::3/64 | | | | cafe::4/64 | |
+# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | |
+# | +-------------+ | | +-------------+ |
+# | | | |
+# | hs-t200-3 netns | | hs-t200-4 netns |
+# | | | |
+# +-------------------+ +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID |Action |
+# +--------------------------------------------------+
+# |fc00:21:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:21:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::2 |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t100 |
+# +---------------------------------------------------+
+# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100 |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::4 |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t200 |
+# +---------------------------------------------------+
+# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID |Action |
+# +--------------------------------------------------+
+# |fc00:12:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:12:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::1 |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t100 |
+# +---------------------------------------------------+
+# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100 |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::3 |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t200 |
+# +---------------------------------------------------+
+# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200 |
+# +---------------------------------------------------+
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-t${tid}-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ # configure the VRF for the tenant X on the router which is directly
+ # connected to the source host.
+ ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+ ip -netns ${rtname} link set vrf-${tid} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ # enslave the veth-tX interface to the vrf-X in the access router
+ ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+ ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
+ ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.rp_filter=0
+
+ ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local rtveth=veth-t${tid}
+ local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046
+
+ ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+ via fd00::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+ encap seg6local action End.DT46 vrftable ${tid} dev vrf-${tid}
+
+ # all sids for VPNs start with a common locator which is fc00::/16.
+ # Routes for handling the SRv6 End.DT46 behavior instances are grouped
+ # together in the 'localsid' table.
+ #
+ # NOTE: added only once
+ if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+ grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+ ip -netns ${rtdst_name} -6 rule add \
+ to ${VPN_LOCATOR_SERVICE}::/16 \
+ lookup ${LOCALSID_TABLE_ID} prio 999
+ fi
+
+ # set default routes to unreachable for both ipv4 and ipv6
+ ip -netns ${rtsrc_name} -6 route add unreachable default metric 4278198272 \
+ vrf vrf-${tid}
+
+ ip -netns ${rtsrc_name} -4 route add unreachable default metric 4278198272 \
+ vrf vrf-${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100 #args: host router tenant
+ setup_hs 2 2 100
+
+ # setup two hosts for the tenant 200
+ # - host hs-3 is directly connected to the router rt-1;
+ # - host hs-4 is directly connected to the router rt-2.
+ setup_hs 3 1 200
+ setup_hs 4 2 200
+
+ # setup the IPv4/IPv6 L3 VPN which connects the host hs-t100-1 and host
+ # hs-t100-2 within the same tenant 100.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+
+ # setup the IPv4/IPv6 L3 VPN which connects the host hs-t200-3 and host
+ # hs-t200-4 within the same tenant 200.
+ setup_vpn_config 3 1 4 2 200
+ setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+ >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ check_rt_connectivity ${rtsrc} ${rtdst}
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+ check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+}
+
+check_and_log_hs_isolation()
+{
+ local hssrc=$1
+ local tidsrc=$2
+ local hsdst=$3
+ local tiddst=$4
+
+ check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "IPv6 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+ check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "IPv4 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc=$1
+ local tid=$2
+
+ check_hs_ipv6_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+ check_hs_ipv4_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+}
+
+router_tests()
+{
+ log_section "IPv6 routers connectivity test"
+
+ check_and_log_rt_connectivity 1 2
+ check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+ log_section "IPv4/IPv6 connectivity test among hosts and gateway"
+
+ check_and_log_hs2gw_connectivity 1 100
+ check_and_log_hs2gw_connectivity 2 100
+
+ check_and_log_hs2gw_connectivity 3 200
+ check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+
+ check_and_log_hs_connectivity 3 4 200
+ check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+ local i
+ local j
+ local k
+ local tmp
+ local l1="1 2"
+ local l2="3 4"
+ local t1=100
+ local t2=200
+
+ log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+ for k in 0 1; do
+ for i in ${l1}; do
+ for j in ${l2}; do
+ check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+ done
+ done
+
+ # let us test the reverse path
+ tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+ tmp=${t1}; t1=${t2}; t2=${tmp}
+ done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index ad7a9fc59934..1003119773e5 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -163,6 +163,9 @@
# +---------------------------------------------------+
#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv4_HS_NETWORK=10.0.0
@@ -464,18 +467,18 @@ host_vpn_isolation_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
index 68708f5e26a0..b9b06ef80d88 100755
--- a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
@@ -164,6 +164,9 @@
# +---------------------------------------------------+
#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
readonly LOCALSID_TABLE_ID=90
readonly IPv6_RT_NETWORK=fd00
readonly IPv6_HS_NETWORK=cafe
@@ -472,18 +475,18 @@ host_vpn_isolation_tests()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c
index 21091be70688..aee631c5284e 100644
--- a/tools/testing/selftests/net/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
@@ -47,7 +47,7 @@ static void usage(const char *error)
{
if (error)
printf("invalid option: %s\n", error);
- printf("timestamping interface option*\n\n"
+ printf("timestamping <interface> [bind_phc_index] [option]*\n\n"
"Options:\n"
" IP_MULTICAST_LOOP - looping outgoing multicasts\n"
" SO_TIMESTAMP - normal software time stamping, ms resolution\n"
@@ -58,6 +58,7 @@ static void usage(const char *error)
" SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
" SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
" SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
+ " SOF_TIMESTAMPING_BIND_PHC - request to bind a PHC of PTP vclock\n"
" SIOCGSTAMP - check last socket time stamp\n"
" SIOCGSTAMPNS - more accurate socket time stamp\n"
" PTPV2 - use PTPv2 messages\n");
@@ -311,7 +312,6 @@ static void recvpacket(int sock, int recvmsg_flags,
int main(int argc, char **argv)
{
- int so_timestamping_flags = 0;
int so_timestamp = 0;
int so_timestampns = 0;
int siocgstamp = 0;
@@ -325,6 +325,8 @@ int main(int argc, char **argv)
struct ifreq device;
struct ifreq hwtstamp;
struct hwtstamp_config hwconfig, hwconfig_requested;
+ struct so_timestamping so_timestamping_get = { 0, -1 };
+ struct so_timestamping so_timestamping = { 0, -1 };
struct sockaddr_in addr;
struct ip_mreq imr;
struct in_addr iaddr;
@@ -342,7 +344,12 @@ int main(int argc, char **argv)
exit(1);
}
- for (i = 2; i < argc; i++) {
+ if (argc >= 3 && sscanf(argv[2], "%d", &so_timestamping.bind_phc) == 1)
+ val = 3;
+ else
+ val = 2;
+
+ for (i = val; i < argc; i++) {
if (!strcasecmp(argv[i], "SO_TIMESTAMP"))
so_timestamp = 1;
else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS"))
@@ -356,17 +363,19 @@ int main(int argc, char **argv)
else if (!strcasecmp(argv[i], "PTPV2"))
ptpv2 = 1;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
- so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
+ so_timestamping.flags |= SOF_TIMESTAMPING_TX_HARDWARE;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
- so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
+ so_timestamping.flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE"))
- so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE;
+ so_timestamping.flags |= SOF_TIMESTAMPING_RX_HARDWARE;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE"))
- so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
+ so_timestamping.flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
- so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
+ so_timestamping.flags |= SOF_TIMESTAMPING_SOFTWARE;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
- so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
+ so_timestamping.flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_BIND_PHC"))
+ so_timestamping.flags |= SOF_TIMESTAMPING_BIND_PHC;
else
usage(argv[i]);
}
@@ -385,10 +394,10 @@ int main(int argc, char **argv)
hwtstamp.ifr_data = (void *)&hwconfig;
memset(&hwconfig, 0, sizeof(hwconfig));
hwconfig.tx_type =
- (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
+ (so_timestamping.flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
hwconfig.rx_filter =
- (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+ (so_timestamping.flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC :
HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
hwconfig_requested = hwconfig;
@@ -413,6 +422,9 @@ int main(int argc, char **argv)
sizeof(struct sockaddr_in)) < 0)
bail("bind");
+ if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, interface, if_len))
+ bail("bind device");
+
/* set multicast group for outgoing packets */
inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */
addr.sin_addr = iaddr;
@@ -444,10 +456,9 @@ int main(int argc, char **argv)
&enabled, sizeof(enabled)) < 0)
bail("setsockopt SO_TIMESTAMPNS");
- if (so_timestamping_flags &&
- setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING,
- &so_timestamping_flags,
- sizeof(so_timestamping_flags)) < 0)
+ if (so_timestamping.flags &&
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping,
+ sizeof(so_timestamping)) < 0)
bail("setsockopt SO_TIMESTAMPING");
/* request IP_PKTINFO for debugging purposes */
@@ -468,14 +479,18 @@ int main(int argc, char **argv)
else
printf("SO_TIMESTAMPNS %d\n", val);
- if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) {
+ len = sizeof(so_timestamping_get);
+ if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping_get,
+ &len) < 0) {
printf("%s: %s\n", "getsockopt SO_TIMESTAMPING",
strerror(errno));
} else {
- printf("SO_TIMESTAMPING %d\n", val);
- if (val != so_timestamping_flags)
- printf(" not the expected value %d\n",
- so_timestamping_flags);
+ printf("SO_TIMESTAMPING flags %d, bind phc %d\n",
+ so_timestamping_get.flags, so_timestamping_get.bind_phc);
+ if (so_timestamping_get.flags != so_timestamping.flags ||
+ so_timestamping_get.bind_phc != so_timestamping.bind_phc)
+ printf(" not expected, flags %d, bind phc %d\n",
+ so_timestamping.flags, so_timestamping.bind_phc);
}
/* send packets forever every five seconds */
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 426d07875a48..97fceb9be9ed 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -25,6 +25,47 @@
#define TLS_PAYLOAD_MAX_LEN 16384
#define SOL_TLS 282
+struct tls_crypto_info_keys {
+ union {
+ struct tls12_crypto_info_aes_gcm_128 aes128;
+ struct tls12_crypto_info_chacha20_poly1305 chacha20;
+ };
+ size_t len;
+};
+
+static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
+ struct tls_crypto_info_keys *tls12)
+{
+ memset(tls12, 0, sizeof(*tls12));
+
+ switch (cipher_type) {
+ case TLS_CIPHER_CHACHA20_POLY1305:
+ tls12->len = sizeof(struct tls12_crypto_info_chacha20_poly1305);
+ tls12->chacha20.info.version = tls_version;
+ tls12->chacha20.info.cipher_type = cipher_type;
+ break;
+ case TLS_CIPHER_AES_GCM_128:
+ tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_128);
+ tls12->aes128.info.version = tls_version;
+ tls12->aes128.info.cipher_type = cipher_type;
+ break;
+ default:
+ break;
+ }
+}
+
+static void memrnd(void *s, size_t n)
+{
+ int *dword = s;
+ char *byte;
+
+ for (; n >= 4; n -= 4)
+ *dword++ = rand();
+ byte = (void *)dword;
+ while (n--)
+ *byte++ = rand();
+}
+
FIXTURE(tls_basic)
{
int fd, cfd;
@@ -133,33 +174,16 @@ FIXTURE_VARIANT_ADD(tls, 13_chacha)
FIXTURE_SETUP(tls)
{
- union {
- struct tls12_crypto_info_aes_gcm_128 aes128;
- struct tls12_crypto_info_chacha20_poly1305 chacha20;
- } tls12;
+ struct tls_crypto_info_keys tls12;
struct sockaddr_in addr;
socklen_t len;
int sfd, ret;
- size_t tls12_sz;
self->notls = false;
len = sizeof(addr);
- memset(&tls12, 0, sizeof(tls12));
- switch (variant->cipher_type) {
- case TLS_CIPHER_CHACHA20_POLY1305:
- tls12_sz = sizeof(struct tls12_crypto_info_chacha20_poly1305);
- tls12.chacha20.info.version = variant->tls_version;
- tls12.chacha20.info.cipher_type = variant->cipher_type;
- break;
- case TLS_CIPHER_AES_GCM_128:
- tls12_sz = sizeof(struct tls12_crypto_info_aes_gcm_128);
- tls12.aes128.info.version = variant->tls_version;
- tls12.aes128.info.cipher_type = variant->cipher_type;
- break;
- default:
- tls12_sz = 0;
- }
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type,
+ &tls12);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
@@ -187,7 +211,7 @@ FIXTURE_SETUP(tls)
if (!self->notls) {
ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
- tls12_sz);
+ tls12.len);
ASSERT_EQ(ret, 0);
}
@@ -200,7 +224,7 @@ FIXTURE_SETUP(tls)
ASSERT_EQ(ret, 0);
ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
- tls12_sz);
+ tls12.len);
ASSERT_EQ(ret, 0);
}
@@ -308,6 +332,8 @@ TEST_F(tls, recv_max)
char recv_mem[TLS_PAYLOAD_MAX_LEN];
char buf[TLS_PAYLOAD_MAX_LEN];
+ memrnd(buf, sizeof(buf));
+
EXPECT_GE(send(self->fd, buf, send_len, 0), 0);
EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0);
@@ -418,8 +444,9 @@ TEST_F(tls, sendmsg_large)
EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len);
}
- while (recvs++ < sends)
+ while (recvs++ < sends) {
EXPECT_NE(recv(self->fd, mem, send_len, 0), -1);
+ }
free(mem);
}
@@ -588,6 +615,8 @@ TEST_F(tls, recvmsg_single_max)
struct iovec vec;
struct msghdr hdr;
+ memrnd(send_mem, sizeof(send_mem));
+
EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len);
vec.iov_base = (char *)recv_mem;
vec.iov_len = TLS_PAYLOAD_MAX_LEN;
@@ -610,6 +639,8 @@ TEST_F(tls, recvmsg_multiple)
struct msghdr hdr;
int i;
+ memrnd(buf, sizeof(buf));
+
EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len);
for (i = 0; i < msg_iovlen; i++) {
iov_base[i] = (char *)malloc(iov_len);
@@ -634,6 +665,8 @@ TEST_F(tls, single_send_multiple_recv)
char send_mem[TLS_PAYLOAD_MAX_LEN * 2];
char recv_mem[TLS_PAYLOAD_MAX_LEN * 2];
+ memrnd(send_mem, sizeof(send_mem));
+
EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
memset(recv_mem, 0, total_len);
@@ -834,18 +867,17 @@ TEST_F(tls, bidir)
int ret;
if (!self->notls) {
- struct tls12_crypto_info_aes_gcm_128 tls12;
+ struct tls_crypto_info_keys tls12;
- memset(&tls12, 0, sizeof(tls12));
- tls12.info.version = variant->tls_version;
- tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type,
+ &tls12);
ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
- sizeof(tls12));
+ tls12.len);
ASSERT_EQ(ret, 0);
ret = setsockopt(self->cfd, SOL_TLS, TLS_TX, &tls12,
- sizeof(tls12));
+ tls12.len);
ASSERT_EQ(ret, 0);
}
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
new file mode 100644
index 000000000000..710ac956bdb3
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.c
@@ -0,0 +1,585 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define TOEPLITZ_KEY_MIN_LEN 40
+#define TOEPLITZ_KEY_MAX_LEN 60
+
+#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+
+#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport = 8000;
+static int cfg_family = AF_INET6;
+static char *cfg_ifname = "eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type = SOCK_STREAM;
+static int cfg_timeout_msec = 1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+ int fd;
+ char *mmap;
+ int idx;
+ int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+ const unsigned char *key)
+{
+ int i, bit, ret = 0;
+ uint32_t key32;
+
+ key32 = ntohl(*((uint32_t *)key));
+ key += 4;
+
+ for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+ for (bit = 7; bit >= 0; bit--) {
+ if (four_tuple[i] & (1 << bit))
+ ret ^= key32;
+
+ key32 <<= 1;
+ key32 |= !!(key[0] & (1 << bit));
+ }
+ key++;
+ }
+
+ return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+ int queue = rx_hash % cfg_num_queues;
+
+ log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+ if (rx_irq_cpus[queue] != cpu) {
+ log_verbose(". error: rss cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+ int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+ log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+ if (rps_silo_to_cpu[silo] != cpu) {
+ log_verbose(". error: rps cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+ const char *addrs, int addr_len)
+{
+ char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+ uint16_t *ports;
+
+ if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+ !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+ error(1, 0, "address parse error");
+
+ ports = (void *)addrs + (addr_len * 2);
+ log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+ cpu, rx_hash, saddr, daddr,
+ ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+ unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+ uint32_t rx_hash_sw;
+ const char *addrs;
+ int addr_len;
+
+ if (cfg_family == AF_INET) {
+ addr_len = sizeof(struct in_addr);
+ addrs = pkt + offsetof(struct iphdr, saddr);
+ } else {
+ addr_len = sizeof(struct in6_addr);
+ addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+ }
+
+ memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+ rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+ if (cfg_verbose)
+ log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+ if (rx_hash != rx_hash_sw) {
+ log_verbose(" != expected 0x%x\n", rx_hash_sw);
+ frames_error++;
+ return;
+ }
+
+ log_verbose(" OK");
+ if (cfg_num_queues)
+ verify_rss(rx_hash, cpu);
+ else if (cfg_num_rps_cpus)
+ verify_rps(rx_hash, cpu);
+ log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+ struct tpacket3_hdr *hdr = (void *)frame;
+
+ if (hdr->hv1.tp_rxhash)
+ verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+ ring->cpu);
+ else
+ frames_nohash++;
+
+ return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static void recv_block(struct ring_state *ring)
+{
+ struct tpacket_block_desc *block;
+ char *frame;
+ int i;
+
+ block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+ if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+ return;
+
+ frame = (char *)block;
+ frame += block->hdr.bh1.offset_to_first_pkt;
+
+ for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+ frame = recv_frame(ring, frame);
+ frames_received++;
+ }
+
+ block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+ ring->idx = (ring->idx + 1) % ring_block_nr;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+ int i;
+
+ usleep(1000 * cfg_timeout_msec);
+
+ for (i = 0; i < num_cpus; i++)
+ recv_block(&rings[i]);
+
+ fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+ frames_received - frames_nohash - frames_error,
+ frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+ struct tpacket_req3 req3 = {0};
+ void *ring;
+
+ req3.tp_retire_blk_tov = cfg_timeout_msec;
+ req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+ req3.tp_frame_size = 2048;
+ req3.tp_frame_nr = 1 << 10;
+ req3.tp_block_nr = 2;
+
+ req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+ req3.tp_block_size /= req3.tp_block_nr;
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+ error(1, errno, "setsockopt PACKET_RX_RING");
+
+ ring_block_sz = req3.tp_block_size;
+ ring_block_nr = req3.tp_block_nr;
+
+ ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+ if (ring == MAP_FAILED)
+ error(1, 0, "mmap failed");
+
+ return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = sizeof(filter) / sizeof(struct sock_filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+ const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
+ uint8_t proto;
+
+ proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+ if (cfg_family == AF_INET)
+ __set_filter(fd, offsetof(struct iphdr, protocol), proto,
+ sizeof(struct iphdr) + off_dport);
+ else
+ __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+ sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = sizeof(filter) / sizeof(struct sock_filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+ struct fanout_args args = {
+ .id = 1,
+ .type_flags = PACKET_FANOUT_CPU,
+ .max_num_members = RSS_MAX_CPUS
+ };
+ struct sockaddr_ll ll = { 0 };
+ int fd, val;
+
+ fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket creation failed");
+
+ val = TPACKET_V3;
+ if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+ error(1, errno, "setsockopt PACKET_VERSION");
+ *ring = setup_ring(fd);
+
+ /* block packets until all rings are added to the fanout group:
+ * else packets can arrive during setup and get misclassified
+ */
+ set_filter_null(fd);
+
+ ll.sll_family = AF_PACKET;
+ ll.sll_ifindex = if_nametoindex(cfg_ifname);
+ ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+ htons(ETH_P_IPV6);
+ if (bind(fd, (void *)&ll, sizeof(ll)))
+ error(1, errno, "bind");
+
+ /* must come after bind: verifies all programs in group match */
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+ /* on failure, retry using old API if that is sufficient:
+ * it has a hard limit of 256 sockets, so only try if
+ * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+ * in this API, the third argument is left implicit.
+ */
+ if (cfg_num_queues || num_cpus > 256 ||
+ setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+ &args, sizeof(uint32_t)))
+ error(1, errno, "setsockopt PACKET_FANOUT cpu");
+ }
+
+ return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+ int fd, val;
+
+ fd = socket(cfg_family, cfg_type, 0);
+ if (fd == -1)
+ error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+ val = 1 << 20;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+
+ return fd;
+}
+
+static void setup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ rings[i].cpu = i;
+ rings[i].fd = create_ring(&rings[i].mmap);
+ }
+
+ /* accept packets once all rings in the fanout group are up */
+ for (i = 0; i < num_cpus; i++)
+ set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+ error(1, errno, "munmap");
+ if (close(rings[i].fd))
+ error(1, errno, "close");
+ }
+}
+
+static void parse_cpulist(const char *arg)
+{
+ do {
+ rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+ arg = strchr(arg, ',');
+ if (!arg)
+ break;
+ arg++; // skip ','
+ } while (1);
+}
+
+static void show_cpulist(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_queues; i++)
+ fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_rps_cpus; i++)
+ fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+ int i, ret, off;
+
+ if (slen < TOEPLITZ_STR_MIN_LEN ||
+ slen > TOEPLITZ_STR_MAX_LEN + 1)
+ error(1, 0, "invalid toeplitz key");
+
+ for (i = 0, off = 0; off < slen; i++, off += 3) {
+ ret = sscanf(str + off, "%hhx", &key[i]);
+ if (ret != 1)
+ error(1, 0, "key parse error at %d off %d len %d",
+ i, off, slen);
+ }
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+ unsigned long bitmap;
+ int i;
+
+ bitmap = strtoul(arg, NULL, 0);
+
+ if (bitmap & ~(RPS_MAX_CPUS - 1))
+ error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+ bitmap, RPS_MAX_CPUS - 1);
+
+ for (i = 0; i < RPS_MAX_CPUS; i++)
+ if (bitmap & 1UL << i)
+ rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ static struct option long_options[] = {
+ {"dport", required_argument, 0, 'd'},
+ {"cpus", required_argument, 0, 'C'},
+ {"key", required_argument, 0, 'k'},
+ {"iface", required_argument, 0, 'i'},
+ {"ipv4", no_argument, 0, '4'},
+ {"ipv6", no_argument, 0, '6'},
+ {"sink", no_argument, 0, 's'},
+ {"tcp", no_argument, 0, 't'},
+ {"timeout", required_argument, 0, 'T'},
+ {"udp", no_argument, 0, 'u'},
+ {"verbose", no_argument, 0, 'v'},
+ {"rps", required_argument, 0, 'r'},
+ {0, 0, 0, 0}
+ };
+ bool have_toeplitz = false;
+ int index, c;
+
+ while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = AF_INET;
+ break;
+ case '6':
+ cfg_family = AF_INET6;
+ break;
+ case 'C':
+ parse_cpulist(optarg);
+ break;
+ case 'd':
+ cfg_dport = strtol(optarg, NULL, 0);
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'k':
+ parse_toeplitz_key(optarg, strlen(optarg),
+ toeplitz_key);
+ have_toeplitz = true;
+ break;
+ case 'r':
+ parse_rps_bitmap(optarg);
+ break;
+ case 's':
+ cfg_sink = true;
+ break;
+ case 't':
+ cfg_type = SOCK_STREAM;
+ break;
+ case 'T':
+ cfg_timeout_msec = strtol(optarg, NULL, 0);
+ break;
+ case 'u':
+ cfg_type = SOCK_DGRAM;
+ break;
+ case 'v':
+ cfg_verbose = true;
+ break;
+
+ default:
+ error(1, 0, "unknown option %c", optopt);
+ break;
+ }
+ }
+
+ if (!have_toeplitz)
+ error(1, 0, "Must supply rss key ('-k')");
+
+ num_cpus = get_nprocs();
+ if (num_cpus > RSS_MAX_CPUS)
+ error(1, 0, "increase RSS_MAX_CPUS");
+
+ if (cfg_num_queues && cfg_num_rps_cpus)
+ error(1, 0,
+ "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+ if (cfg_verbose) {
+ show_cpulist();
+ show_silos();
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const int min_tests = 10;
+ int fd_sink = -1;
+
+ parse_opts(argc, argv);
+
+ if (cfg_sink)
+ fd_sink = setup_sink();
+
+ setup_rings();
+ process_rings();
+ cleanup_rings();
+
+ if (cfg_sink && close(fd_sink))
+ error(1, errno, "close sink");
+
+ if (frames_received - frames_nohash < min_tests)
+ error(1, 0, "too few frames for verification");
+
+ return frames_error;
+}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
new file mode 100755
index 000000000000..0a49907cd4fe
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.sh
@@ -0,0 +1,199 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
+# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
+# ('-rps <rps_map>')
+#
+# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
+# which is a driver-specific encoding.
+#
+# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
+# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
+
+source setup_loopback.sh
+readonly SERVER_IP4="192.168.1.200/24"
+readonly SERVER_IP6="fda8::1/64"
+readonly SERVER_MAC="aa:00:00:00:00:02"
+
+readonly CLIENT_IP4="192.168.1.100/24"
+readonly CLIENT_IP6="fda8::2/64"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+PORT=8000
+KEY="$(</proc/sys/net/core/netdev_rss_key)"
+TEST_RSS=false
+RPS_MAP=""
+PROTO_FLAG=""
+IP_FLAG=""
+DEV="eth0"
+
+# Return the number of rxqs among which RSS is configured to spread packets.
+# This is determined by reading the RSS indirection table using ethtool.
+get_rss_cfg_num_rxqs() {
+ echo $(ethtool -x "${DEV}" |
+ egrep [[:space:]]+[0-9]+:[[:space:]]+ |
+ cut -d: -f2- |
+ awk '{$1=$1};1' |
+ tr ' ' '\n' |
+ sort -u |
+ wc -l)
+}
+
+# Return a list of the receive irq handler cpus.
+# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
+# Reads /sys/kernel/irq/ in order, so algorithm depends on
+# irq_{rxq-0} < irq_{rxq-1}, etc.
+get_rx_irq_cpus() {
+ CPUS=""
+ # sort so that irq 2 is read before irq 10
+ SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
+ # Consider only as many queues as RSS actually uses. We assume that
+ # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
+ RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
+ RXQ_COUNT=0
+
+ for i in ${SORTED_IRQS}
+ do
+ [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
+ # lookup relevant IRQs by action name
+ [[ -e "$i/actions" ]] || continue
+ cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
+ irqname=$(<"$i/actions")
+
+ # does the IRQ get called
+ irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
+ [[ -n "${irqcount}" ]] || continue
+
+ # lookup CPU
+ irq=$(basename "$i")
+ cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
+
+ if [[ -z "${CPUS}" ]]; then
+ CPUS="${cpu}"
+ else
+ CPUS="${CPUS},${cpu}"
+ fi
+ RXQ_COUNT=$((RXQ_COUNT+1))
+ done
+
+ echo "${CPUS}"
+}
+
+get_disable_rfs_cmd() {
+ echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
+}
+
+get_set_rps_bitmaps_cmd() {
+ CMD=""
+ for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
+ do
+ CMD="${CMD} echo $1 > ${i};"
+ done
+
+ echo "${CMD}"
+}
+
+get_disable_rps_cmd() {
+ echo "$(get_set_rps_bitmaps_cmd 0)"
+}
+
+die() {
+ echo "$1"
+ exit 1
+}
+
+check_nic_rxhash_enabled() {
+ local -r pattern="receive-hashing:\ on"
+
+ ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
+}
+
+parse_opts() {
+ local prog=$0
+ shift 1
+
+ while [[ "$1" =~ "-" ]]; do
+ if [[ "$1" = "-irq_prefix" ]]; then
+ shift
+ IRQ_PATTERN="^$1-[0-9]*$"
+ elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
+ PROTO_FLAG="$1"
+ elif [[ "$1" = "-4" ]]; then
+ IP_FLAG="$1"
+ SERVER_IP="${SERVER_IP4}"
+ CLIENT_IP="${CLIENT_IP4}"
+ elif [[ "$1" = "-6" ]]; then
+ IP_FLAG="$1"
+ SERVER_IP="${SERVER_IP6}"
+ CLIENT_IP="${CLIENT_IP6}"
+ elif [[ "$1" = "-rss" ]]; then
+ TEST_RSS=true
+ elif [[ "$1" = "-rps" ]]; then
+ shift
+ RPS_MAP="$1"
+ elif [[ "$1" = "-i" ]]; then
+ shift
+ DEV="$1"
+ else
+ die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
+ [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
+ fi
+ shift
+ done
+}
+
+setup() {
+ setup_loopback_environment "${DEV}"
+
+ # Set up server_ns namespace and client_ns namespace
+ setup_macvlan_ns "${DEV}" server_ns server \
+ "${SERVER_MAC}" "${SERVER_IP}"
+ setup_macvlan_ns "${DEV}" client_ns client \
+ "${CLIENT_MAC}" "${CLIENT_IP}"
+}
+
+cleanup() {
+ cleanup_macvlan_ns server_ns server client_ns client
+ cleanup_loopback "${DEV}"
+}
+
+parse_opts $0 $@
+
+setup
+trap cleanup EXIT
+
+check_nic_rxhash_enabled
+
+# Actual test starts here
+if [[ "${TEST_RSS}" = true ]]; then
+ # RPS/RFS must be disabled because they move packets between cpus,
+ # which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
+ eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+ -C "$(get_rx_irq_cpus)" -s -v &
+elif [[ ! -z "${RPS_MAP}" ]]; then
+ eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+ -r "0x${RPS_MAP}" -s -v &
+else
+ ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
+fi
+
+server_pid=$!
+
+ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
+ "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
+
+client_pid=$!
+
+wait "${server_pid}"
+exit_code=$?
+kill -9 "${client_pid}"
+if [[ "${exit_code}" -eq 0 ]]; then
+ echo "Test Succeeded!"
+fi
+exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
new file mode 100755
index 000000000000..2fef34f4aba1
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz_client.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A simple program for generating traffic for the toeplitz test.
+#
+# This program sends packets periodically for, conservatively, 20 seconds. The
+# intent is for the calling program to kill this program once it is no longer
+# needed, rather than waiting for the 20 second expiration.
+
+send_traffic() {
+ expiration=$((SECONDS+20))
+ while [[ "${SECONDS}" -lt "${expiration}" ]]
+ do
+ if [[ "${PROTO}" == "-u" ]]; then
+ echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
+ else
+ echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
+ fi
+ sleep 0.001
+ done
+}
+
+PROTO=$1
+IPVER=$2
+ADDR=$3
+PORT=$4
+
+send_traffic
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index dbf0421986df..2d10ccac898a 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -28,12 +28,15 @@
# These tests provide an easy way to flip the expected result of any
# of these behaviors for testing kernel patches that change them.
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
# nettest can be run from PATH or from same directory as this selftest
if ! which nettest >/dev/null; then
PATH=$PWD:$PATH
if ! which nettest >/dev/null; then
echo "'nettest' command not found; skipping tests"
- exit 0
+ exit $ksft_skip
fi
fi
@@ -189,6 +192,15 @@ segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255
route_test 240.5.6.7 240.5.6.1 255.1.2.1 255.1.2.3 24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)"
route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)"
#
+# Test support for lowest address ending in .0
+segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (/24)"
+#
+# Test support for lowest address not ending in .0
+segmenttest 192.168.101.192 192.168.101.193 26 "assign and ping lowest address (/26)"
+#
+# Routing using lowest address as a gateway/endpoint
+route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address"
+#
# ==============================================
# ==== TESTS THAT CURRENTLY EXPECT FAILURE =====
# ==============================================
@@ -202,14 +214,6 @@ segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forb
# Currently Linux does not allow this, so this should fail too
segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)"
#
-# Test support for lowest address
-# Currently Linux does not allow this, so this should fail too
-segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (is forbidden)"
-#
-# Routing using lowest address as a gateway/endpoint
-# Currently Linux does not allow this, so this should fail too
-route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address (is forbidden)"
-#
# Test support for unicast use of class D
# Currently Linux does not allow this, so this should fail too
segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)"
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 11d7cdb898c0..19eac3e44c06 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -13,7 +13,7 @@ readonly NS_DST=$BASE$DST
readonly BM_NET_V4=192.168.1.
readonly BM_NET_V6=2001:db8::
-readonly NPROCS=`nproc`
+readonly CPUS=`nproc`
ret=0
cleanup() {
@@ -75,6 +75,29 @@ chk_tso_flag() {
__chk_flag "$1" $2 $3 tcp-segmentation-offload
}
+chk_channels() {
+ local msg="$1"
+ local target=$2
+ local rx=$3
+ local tx=$4
+
+ local dev=veth$target
+
+ local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep RX: | tail -n 1 | awk '{print $2}' `
+ local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep TX: | tail -n 1 | awk '{print $2}'`
+ local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep Combined: | tail -n 1 | awk '{print $2}'`
+
+ printf "%-60s" "$msg"
+ if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then
+ echo " ok "
+ else
+ echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined"
+ fi
+}
+
chk_gro() {
local msg="$1"
local expected=$2
@@ -107,11 +130,100 @@ chk_gro() {
fi
}
+__change_channels()
+{
+ local cur_cpu
+ local end=$1
+ local cur
+ local i
+
+ while true; do
+ printf -v cur '%(%s)T'
+ [ $cur -le $end ] || break
+
+ for i in `seq 1 $CPUS`; do
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i
+ ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i
+ done
+
+ for i in `seq 1 $((CPUS - 1))`; do
+ cur_cpu=$((CPUS - $i))
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu
+ ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu
+ done
+ done
+}
+
+__send_data() {
+ local end=$1
+
+ while true; do
+ printf -v cur '%(%s)T'
+ [ $cur -le $end ] || break
+
+ ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST
+ done
+}
+
+do_stress() {
+ local end
+ printf -v end '%(%s)T'
+ end=$((end + $STRESS))
+
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3
+ ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+
+ ip netns exec $NS_DST ./udpgso_bench_rx &
+ local rx_pid=$!
+
+ echo "Running stress test for $STRESS seconds..."
+ __change_channels $end &
+ local ch_pid=$!
+ __send_data $end &
+ local data_pid_1=$!
+ __send_data $end &
+ local data_pid_2=$!
+ __send_data $end &
+ local data_pid_3=$!
+ __send_data $end &
+ local data_pid_4=$!
+
+ wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4
+ kill -9 $rx_pid
+ echo "done"
+
+ # restore previous setting
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1
+}
+
+usage() {
+ echo "Usage: $0 [-h] [-s <seconds>]"
+ echo -e "\t-h: show this help"
+ echo -e "\t-s: run optional stress tests for the given amount of seconds"
+}
+
+STRESS=0
+while getopts "hs:" option; do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "s")
+ STRESS=$OPTARG
+ ;;
+ esac
+done
+
if [ ! -f ../bpf/xdp_dummy.o ]; then
echo "Missing xdp_dummy helper. Build bpf selftest first"
exit 1
fi
+[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped"
+[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too"
+
create_ns
chk_gro_flag "default - gro flag" $SRC off
chk_gro_flag " - peer gro flag" $DST off
@@ -134,6 +246,8 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
+chk_channels "default channels" $DST 1 1
+
ip -n $NS_DST link set dev veth$DST down
ip netns exec $NS_DST ethtool -K veth$DST gro on
chk_gro_flag "with gro enabled on link down - gro flag" $DST on
@@ -147,6 +261,56 @@ chk_gro " - aggregation with TSO off" 1
cleanup
create_ns
+
+CUR_TX=1
+CUR_RX=1
+if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST tx 2
+ chk_channels "setting tx channels" $DST 1 2
+ CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+ chk_channels "setting both rx and tx channels" $DST 3 3
+ CUR_RX=3
+ CUR_TX=3
+fi
+
+ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null
+chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX
+
+ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null
+chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX
+
+if [ $CPUS -gt 1 ]; then
+ # this also tests queues nr reduction
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
+ printf "%-60s" "bad setting: XDP with RX nr less than TX"
+ ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+ section xdp_dummy 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+
+ # the following tests will run with multiple channels active
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2
+ ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
+ section xdp_dummy 2>/dev/null
+ printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+ CUR_RX=2
+ CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+ printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set"
+ ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+ chk_channels "setting invalid channels nr" $DST 2 2
+fi
+
ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
chk_gro_flag "with xdp attached - gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
@@ -167,10 +331,27 @@ chk_gro_flag " - after gro on xdp off, gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
chk_tso_flag " - tso flag" $SRC on
chk_tso_flag " - peer tso flag" $DST on
+
+if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST tx 1
+ chk_channels "decreasing tx channels with device down" $DST 2 1
+fi
+
ip -n $NS_DST link set dev veth$DST up
ip -n $NS_SRC link set dev veth$SRC up
chk_gro " - aggregation" 1
+if [ $CPUS -gt 1 ]; then
+ [ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress
+
+ ip -n $NS_DST link set dev veth$DST down
+ ip -n $NS_SRC link set dev veth$SRC down
+ ip netns exec $NS_DST ethtool -L veth$DST tx 2
+ chk_channels "increasing tx channels with device down" $DST 2 2
+ ip -n $NS_DST link set dev veth$DST up
+ ip -n $NS_SRC link set dev veth$SRC up
+fi
+
ip netns exec $NS_DST ethtool -K veth$DST gro off
ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
chk_gro "aggregation again with default and TSO off" 10
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
index 18b982d611de..865d53c1781c 100755
--- a/tools/testing/selftests/net/vrf_strict_mode_test.sh
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -3,6 +3,9 @@
# This test is designed for testing the new VRF strict_mode functionality.
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
ret=0
# identifies the "init" network namespace which is often called root network
@@ -371,18 +374,18 @@ vrf_strict_mode_check_support()
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
modprobe vrf &>/dev/null
if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
echo "SKIP: vrf sysctl does not exist"
- exit 0
+ exit $ksft_skip
fi
cleanup &> /dev/null
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index cd6430b39982..8748199ac109 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -5,7 +5,7 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
- ipip-conntrack-mtu.sh
+ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh
LDLIBS = -lmnl
TEST_GEN_FILES = nf-queue
diff --git a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh
new file mode 100755
index 000000000000..e7d7bf13cff5
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that UNREPLIED tcp conntrack will eventually timeout.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+waittime=20
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup() {
+ ip netns pids $ns1 | xargs kill 2>/dev/null
+ ip netns pids $ns2 | xargs kill 2>/dev/null
+
+ ip netns del $ns1
+ ip netns del $ns2
+}
+
+ipv4() {
+ echo -n 192.168.$1.2
+}
+
+check_counter()
+{
+ ns=$1
+ name=$2
+ expect=$3
+ local lret=0
+
+ cnt=$(ip netns exec $ns2 nft list counter inet filter "$name" | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2
+ ip netns exec $ns2 nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+
+ return $lret
+}
+
+# Create test namespaces
+ip netns add $ns1 || exit 1
+
+trap cleanup EXIT
+
+ip netns add $ns2 || exit 1
+
+# Connect the namespace to the host using a veth pair
+ip -net $ns1 link add name veth1 type veth peer name veth2
+ip -net $ns1 link set netns $ns2 dev veth2
+
+ip -net $ns1 link set up dev lo
+ip -net $ns2 link set up dev lo
+ip -net $ns1 link set up dev veth1
+ip -net $ns2 link set up dev veth2
+
+ip -net $ns2 addr add 10.11.11.2/24 dev veth2
+ip -net $ns2 route add default via 10.11.11.1
+
+ip netns exec $ns2 sysctl -q net.ipv4.conf.veth2.forwarding=1
+
+# add a rule inside NS so we enable conntrack
+ip netns exec $ns1 iptables -A INPUT -m state --state established,related -j ACCEPT
+
+ip -net $ns1 addr add 10.11.11.1/24 dev veth1
+ip -net $ns1 route add 10.99.99.99 via 10.11.11.2
+
+# Check connectivity works
+ip netns exec $ns1 ping -q -c 2 10.11.11.2 >/dev/null || exit 1
+
+ip netns exec $ns2 nc -l -p 8080 < /dev/null &
+
+# however, conntrack entries are there
+
+ip netns exec $ns2 nft -f - <<EOF
+table inet filter {
+ counter connreq { }
+ counter redir { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+ ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept
+ ct state new ct status dnat tcp dport 8080 counter name "redir" accept
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not load nft rules"
+ exit 1
+fi
+
+ip netns exec $ns2 sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10
+
+echo "INFO: connect $ns1 -> $ns2 to the virtual ip"
+ip netns exec $ns1 bash -c 'while true ; do
+ nc -p 60000 10.99.99.99 80
+ sleep 1
+ done' &
+
+sleep 1
+
+ip netns exec $ns2 nft -f - <<EOF
+table inet nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ ip daddr 10.99.99.99 tcp dport 80 redirect to :8080
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not load nat redirect"
+ exit 1
+fi
+
+count=$(ip netns exec $ns2 conntrack -L -p tcp --dport 80 2>/dev/null | wc -l)
+if [ $count -eq 0 ]; then
+ echo "ERROR: $ns2 did not pick up tcp connection from peer"
+ exit 1
+fi
+
+echo "INFO: NAT redirect added in ns $ns2, waiting for $waittime seconds for nat to take effect"
+for i in $(seq 1 $waittime); do
+ echo -n "."
+
+ sleep 1
+
+ count=$(ip netns exec $ns2 conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l)
+ if [ $count -gt 0 ]; then
+ echo
+ echo "PASS: redirection took effect after $i seconds"
+ break
+ fi
+
+ m=$((i%20))
+ if [ $m -eq 0 ]; then
+ echo " waited for $i seconds"
+ fi
+done
+
+expect="packets 1 bytes 60"
+check_counter "$ns2" "redir" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+if [ $ret -eq 0 ];then
+ echo "PASS: redirection counter has expected values"
+else
+ echo "ERROR: no tcp connection was redirected"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index 427d94816f2d..d4ffebb989f8 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -199,7 +199,6 @@ fi
# test basic connectivity
if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
echo "ERROR: ns1 cannot reach ns2" 1>&2
- bash
exit 1
fi
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index d7e07f4c3d7f..da1c1e4b6c86 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -741,6 +741,149 @@ EOF
return $lret
}
+# test port shadowing.
+# create two listening services, one on router (ns0), one
+# on client (ns2), which is masqueraded from ns1 point of view.
+# ns2 sends udp packet coming from service port to ns1, on a highport.
+# Later, if n1 uses same highport to connect to ns0:service, packet
+# might be port-forwarded to ns2 instead.
+
+# second argument tells if we expect the 'fake-entry' to take effect
+# (CLIENT) or not (ROUTER).
+test_port_shadow()
+{
+ local test=$1
+ local expect=$2
+ local daddrc="10.0.1.99"
+ local daddrs="10.0.1.1"
+ local result=""
+ local logmsg=""
+
+ echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
+ nc_r=$!
+
+ echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 &
+ nc_c=$!
+
+ # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
+ echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null
+
+ # ns1 tries to connect to ns0:1405. With default settings this should connect
+ # to client, it matches the conntrack entry created above.
+
+ result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405)
+
+ if [ "$result" = "$expect" ] ;then
+ echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
+ else
+ echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended"
+ ret=1
+ fi
+
+ kill $nc_r $nc_c 2>/dev/null
+
+ # flush udp entries for next test round, if any
+ ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
+}
+
+# This prevents port shadow of router service via packet filter,
+# packets claiming to originate from service port from internal
+# network are dropped.
+test_port_shadow_filter()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ meta iif veth1 udp sport 1405 drop
+ }
+}
+EOF
+ test_port_shadow "port-filter" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table $family filter
+}
+
+# This prevents port shadow of router service via notrack.
+test_port_shadow_notrack()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family raw {
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+ meta iif veth0 udp dport 1405 notrack
+ udp dport 1405 notrack
+ }
+ chain output {
+ type filter hook output priority -300; policy accept;
+ udp sport 1405 notrack
+ }
+}
+EOF
+ test_port_shadow "port-notrack" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table $family raw
+}
+
+# This prevents port shadow of router service via sport remap.
+test_port_shadow_pat()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family pat {
+ chain postrouting {
+ type nat hook postrouting priority -1; policy accept;
+ meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random
+ }
+}
+EOF
+ test_port_shadow "pat" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table $family pat
+}
+
+test_port_shadowing()
+{
+ local family="ip"
+
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oif veth0 masquerade
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family masquerade hook"
+ return $ksft_skip
+ fi
+
+ # test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
+ test_port_shadow "default" "CLIENT"
+
+ # test packet filter based mitigation: prevent forwarding of
+ # packets claiming to come from the service port.
+ test_port_shadow_filter "$family"
+
+ # test conntrack based mitigation: connections going or coming
+ # from router:service bypass connection tracking.
+ test_port_shadow_notrack "$family"
+
+ # test nat based mitigation: fowarded packets coming from service port
+ # are masqueraded with random highport.
+ test_port_shadow_pat "$family"
+
+ ip netns exec "$ns0" nft delete table $family nat
+}
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in 0 1 2; do
@@ -861,6 +1004,8 @@ reset_counters
$test_inet_nat && test_redirect inet
$test_inet_nat && test_redirect6 inet
+test_port_shadowing
+
if [ $ret -ne 0 ];then
echo -n "FAIL: "
nft --version
diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/netfilter/nft_nat_zones.sh
new file mode 100755
index 000000000000..b9ab37380f33
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_nat_zones.sh
@@ -0,0 +1,309 @@
+#!/bin/bash
+#
+# Test connection tracking zone and NAT source port reallocation support.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Don't increase too much, 2000 clients should work
+# just fine but script can then take several minutes with
+# KASAN/debug builds.
+maxclients=100
+
+have_iperf=1
+ret=0
+
+# client1---.
+# veth1-.
+# |
+# NAT Gateway --veth0--> Server
+# | |
+# veth2-' |
+# client2---' |
+# .... |
+# clientX----vethX---'
+
+# All clients share identical IP address.
+# NAT Gateway uses policy routing and conntrack zones to isolate client
+# namespaces. Each client connects to Server, each with colliding tuples:
+# clientsaddr:10000 -> serveraddr:dport
+# NAT Gateway is supposed to do port reallocation for each of the
+# connections.
+
+sfx=$(mktemp -u "XXXXXXXX")
+gw="ns-gw-$sfx"
+cl1="ns-cl1-$sfx"
+cl2="ns-cl2-$sfx"
+srv="ns-srv-$sfx"
+
+v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
+v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
+v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
+v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
+v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
+v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
+
+cleanup()
+{
+ ip netns del $gw
+ ip netns del $srv
+ for i in $(seq 1 $maxclients); do
+ ip netns del ns-cl$i-$sfx 2>/dev/null
+ done
+
+ sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without conntrack tool"
+ exit $ksft_skip
+fi
+
+iperf3 -v >/dev/null 2>&1
+if [ $? -ne 0 ];then
+ have_iperf=0
+fi
+
+ip netns add "$gw"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $gw"
+ exit $ksft_skip
+fi
+ip -net "$gw" link set lo up
+
+trap cleanup EXIT
+
+ip netns add "$srv"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create server netns $srv"
+ exit $ksft_skip
+fi
+
+ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
+ip -net "$gw" link set veth0 up
+ip -net "$srv" link set lo up
+ip -net "$srv" link set eth0 up
+
+sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
+
+for i in $(seq 1 $maxclients);do
+ cl="ns-cl$i-$sfx"
+
+ ip netns add "$cl"
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not create client netns $cl"
+ exit $ksft_skip
+ fi
+ ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+ fi
+done
+
+for i in $(seq 1 $maxclients);do
+ cl="ns-cl$i-$sfx"
+ echo netns exec "$cl" ip link set lo up
+ echo netns exec "$cl" ip link set eth0 up
+ echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
+ echo netns exec "$gw" ip link set veth$i up
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
+
+ # clients have same IP addresses.
+ echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
+ echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
+ echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
+ echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
+
+ # NB: same addresses on client-facing interfaces.
+ echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
+ echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
+
+ # gw: policy routing
+ echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
+done | ip -batch /dev/stdin
+
+ip -net "$gw" addr add 10.3.0.1/24 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0
+
+ip -net "$srv" addr add 10.3.0.99/24 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0
+
+ip netns exec $gw nft -f /dev/stdin<<EOF
+table inet raw {
+ map iiftomark {
+ type ifname : mark
+ }
+
+ map iiftozone {
+ typeof iifname : ct zone
+ }
+
+ set inicmp {
+ flags dynamic
+ type ipv4_addr . ifname . ipv4_addr
+ }
+ set inflows {
+ flags dynamic
+ type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
+ }
+
+ set inflows6 {
+ flags dynamic
+ type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority -64000; policy accept;
+ ct original zone set meta iifname map @iiftozone
+ meta mark set meta iifname map @iiftomark
+
+ tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
+ add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
+ ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
+ }
+
+ chain nat_postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ct mark set meta mark meta oifname veth0 masquerade
+ }
+
+ chain mangle_prerouting {
+ type filter hook prerouting priority -100; policy accept;
+ ct direction reply meta mark set ct mark
+ }
+}
+EOF
+
+( echo add element inet raw iiftomark \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth$i\" : $i,
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+ echo add element inet raw iiftozone \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth$i\" : $i,
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+) | ip netns exec $gw nft -f /dev/stdin
+
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
+
+# useful for debugging: allows to use 'ping' from clients to gateway.
+ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
+
+for i in $(seq 1 $maxclients); do
+ cl="ns-cl$i-$sfx"
+ ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
+ if [ $? -ne 0 ]; then
+ echo FAIL: Ping failure from $cl 1>&2
+ ret=1
+ break
+ fi
+done
+
+wait
+
+for i in $(seq 1 $maxclients); do
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
+ if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth$i" 1>&2
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+ break
+ fi
+done
+
+ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+fi
+
+if [ $ret -eq 0 ]; then
+ echo "PASS: ping test from all $maxclients namespaces"
+fi
+
+if [ $have_iperf -eq 0 ];then
+ echo "SKIP: iperf3 not installed"
+ if [ $ret -ne 0 ];then
+ exit $ret
+ fi
+ exit $ksft_skip
+fi
+
+ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
+iperfpid=$!
+sleep 1
+
+for i in $(seq 1 $maxclients); do
+ if [ $ret -ne 0 ]; then
+ break
+ fi
+ cl="ns-cl$i-$sfx"
+ ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo FAIL: Failure to connect for $cl 1>&2
+ ip netns exec $gw conntrack -S 1>&2
+ ret=1
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: iperf3 connections for all $maxclients net namespaces"
+fi
+
+kill $iperfpid
+wait
+
+for i in $(seq 1 $maxclients); do
+ ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
+ if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
+ break
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: Found client connection for all $maxclients net namespaces"
+fi
+
+ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
+if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: cannot find return entry on veth0" 1>&2
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh
new file mode 100755
index 000000000000..ac646376eb01
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_zones_many.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+
+# Test insertion speed for packets with identical addresses/ports
+# that are all placed in distinct conntrack zones.
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns="ns-$sfx"
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+zones=20000
+have_ct_tool=0
+ret=0
+
+cleanup()
+{
+ ip netns del $ns
+}
+
+ip netns add $ns
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $gw"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+conntrack -V > /dev/null 2>&1
+if [ $? -eq 0 ];then
+ have_ct_tool=1
+fi
+
+ip -net "$ns" link set lo up
+
+test_zones() {
+ local max_zones=$1
+
+ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+ip netns exec $ns nft -f /dev/stdin<<EOF
+flush ruleset
+table inet raw {
+ map rndzone {
+ typeof numgen inc mod $max_zones : ct zone
+ }
+
+ chain output {
+ type filter hook output priority -64000; policy accept;
+ udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone
+ }
+}
+EOF
+ (
+ echo "add element inet raw rndzone {"
+ for i in $(seq 1 $max_zones);do
+ echo -n "$i : $i"
+ if [ $i -lt $max_zones ]; then
+ echo ","
+ else
+ echo "}"
+ fi
+ done
+ ) | ip netns exec $ns nft -f /dev/stdin
+
+ local i=0
+ local j=0
+ local outerstart=$(date +%s%3N)
+ local stop=$outerstart
+
+ while [ $i -lt $max_zones ]; do
+ local start=$(date +%s%3N)
+ i=$((i + 10000))
+ j=$((j + 1))
+ dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null
+ if [ $? -ne 0 ] ;then
+ ret=1
+ break
+ fi
+
+ stop=$(date +%s%3N)
+ local duration=$((stop-start))
+ echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)"
+ done
+
+ if [ $have_ct_tool -eq 1 ]; then
+ local count=$(ip netns exec "$ns" conntrack -C)
+ local duration=$((stop-outerstart))
+
+ if [ $count -eq $max_zones ]; then
+ echo "PASS: inserted $count entries from packet path in $duration ms total"
+ else
+ ip netns exec $ns conntrack -S 1>&2
+ echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
+ ret=1
+ fi
+ fi
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: insert $max_zones entries from packet path" 1>&2
+ fi
+}
+
+test_conntrack_tool() {
+ local max_zones=$1
+
+ ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
+
+ local outerstart=$(date +%s%3N)
+ local start=$(date +%s%3N)
+ local stop=$start
+ local i=0
+ while [ $i -lt $max_zones ]; do
+ i=$((i + 1))
+ ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
+ if [ $? -ne 0 ];then
+ ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
+ echo "FAIL: conntrack -I returned an error"
+ ret=1
+ break
+ fi
+
+ if [ $((i%10000)) -eq 0 ];then
+ stop=$(date +%s%3N)
+
+ local duration=$((stop-start))
+ echo "PASS: added 10000 entries in $duration ms (now $i total)"
+ start=$stop
+ fi
+ done
+
+ local count=$(ip netns exec "$ns" conntrack -C)
+ local duration=$((stop-outerstart))
+
+ if [ $count -eq $max_zones ]; then
+ echo "PASS: inserted $count entries via ctnetlink in $duration ms"
+ else
+ ip netns exec $ns conntrack -S 1>&2
+ echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
+ ret=1
+ fi
+}
+
+test_zones $zones
+
+if [ $have_ct_tool -eq 1 ];then
+ test_conntrack_tool $zones
+else
+ echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
+ if [ $ret -eq 0 ];then
+ exit $ksft_skip
+ fi
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index 381d874cce99..1bddbe934204 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -22,7 +22,11 @@
* XXX: This is wrong on {mips, parisc, powerpc, sparc}.
*/
#undef O_LARGEFILE
+#ifdef __aarch64__
+#define O_LARGEFILE 0x20000
+#else
#define O_LARGEFILE 0x8000
+#endif
struct open_how_ext {
struct open_how inner;
@@ -155,7 +159,7 @@ struct flag_test {
int err;
};
-#define NUM_OPENAT2_FLAG_TESTS 24
+#define NUM_OPENAT2_FLAG_TESTS 25
void test_openat2_flags(void)
{
@@ -229,6 +233,11 @@ void test_openat2_flags(void)
{ .name = "invalid how.resolve and O_PATH",
.how.flags = O_PATH,
.how.resolve = 0x1337, .err = -EINVAL },
+
+ /* currently unknown upper 32 bit rejected. */
+ { .name = "currently unknown bit (1 << 63)",
+ .how.flags = O_RDONLY | (1ULL << 63),
+ .how.resolve = 0, .err = -EINVAL },
};
BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_FLAG_TESTS);
diff --git a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
index 579f0215c6e7..9836838a529f 100644
--- a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
+++ b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
@@ -14,6 +14,7 @@
#include <time.h>
#include <sys/types.h>
#include <sys/time.h>
+#include <sys/syscall.h>
#include <signal.h>
static volatile int soak_done;
@@ -121,7 +122,7 @@ static void do_null_syscall(unsigned long nr)
unsigned long i;
for (i = 0; i < nr; i++)
- getppid();
+ syscall(__NR_gettid);
}
#define TIME(A, STR) \
diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
index 640fad6cc2c7..0785c2e99d40 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/Makefile
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -1,8 +1,8 @@
-CFLAGS = -O3 -m64 -I./include
+CFLAGS = -O3 -m64 -I./include -I../include
TEST_GEN_FILES := gzfht_test gunz_test
TEST_PROGS := nx-gzip-test.sh
include ../../lib.mk
-$(TEST_GEN_FILES): gzip_vas.c
+$(TEST_GEN_FILES): gzip_vas.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
index b099753b50e4..095195a25687 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
@@ -60,6 +60,7 @@
#include <assert.h>
#include <errno.h>
#include <signal.h>
+#include "utils.h"
#include "nxu.h"
#include "nx.h"
@@ -70,6 +71,8 @@ FILE *nx_gzip_log;
#define FNAME_MAX 1024
#define FEXT ".nx.gz"
+#define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len"
+
/*
* LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
*/
@@ -244,6 +247,7 @@ int compress_file(int argc, char **argv, void *handle)
struct nx_gzip_crb_cpb_t *cmdp;
uint32_t pagelen = 65536;
int fault_tries = NX_MAX_FAULTS;
+ char buf[32];
cmdp = (void *)(uintptr_t)
aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
@@ -263,8 +267,17 @@ int compress_file(int argc, char **argv, void *handle)
assert(NULL != (outbuf = (char *)malloc(outlen)));
nxu_touch_pages(outbuf, outlen, pagelen, 1);
- /* Compress piecemeal in smallish chunks */
- chunk = 1<<22;
+ /*
+ * On PowerVM, the hypervisor defines the maximum request buffer
+ * size is defined and this value is available via sysfs.
+ */
+ if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) {
+ chunk = atoi(buf);
+ } else {
+ /* sysfs entry is not available on PowerNV */
+ /* Compress piecemeal in smallish chunks */
+ chunk = 1<<22;
+ }
/* Write the gzip header to the stream */
num_hdr_bytes = gzip_header_blank(outbuf);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index c5ecb4634094..010160690227 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -24,7 +24,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \
fork_cleanup_test ebb_on_child_test \
ebb_on_willing_child_test back_to_back_ebbs_test \
lost_exception_test no_handler_test \
- cycles_with_mmcr2_test
+ cycles_with_mmcr2_test regs_access_pmccext_test
top_srcdir = ../../../../../..
include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
index b5bc2b616075..2c803b5b48d6 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
@@ -55,8 +55,6 @@ void ebb_global_disable(void);
bool ebb_is_supported(void);
void ebb_freeze_pmcs(void);
void ebb_unfreeze_pmcs(void);
-void event_ebb_init(struct event *e);
-void event_leader_ebb_init(struct event *e);
int count_pmc(int pmc, uint32_t sample_period);
void dump_ebb_state(void);
void dump_summary_ebb_state(void);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
index fc5bf4870d8e..01e827c31169 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
@@ -50,8 +50,6 @@ static int no_handler_test(void)
event_close(&event);
- dump_ebb_state();
-
/* The real test is that we never took an EBB at 0x0 */
return 0;
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c
new file mode 100644
index 000000000000..1eda8e9932e8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that closing the EBB event clears MMCR0_PMCC and
+ * sets MMCR0_PMCCEXT preventing further read access to the
+ * group B PMU registers.
+ */
+
+static int regs_access_pmccext(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 1)
+ FAIL_IF(core_busy_loop());
+
+ ebb_global_disable();
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ /*
+ * For ISA v3.1, verify the test takes a SIGILL when reading
+ * PMU regs after the event is closed. With the control bit
+ * in MMCR0 (PMCCEXT) restricting access to group B PMU regs,
+ * sigill is expected.
+ */
+ if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
+ FAIL_IF(catch_sigill(dump_ebb_state));
+ else
+ dump_ebb_state();
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(regs_access_pmccext, "regs_access_pmccext");
+}
diff --git a/tools/testing/selftests/powerpc/primitives/asm/extable.h b/tools/testing/selftests/powerpc/primitives/asm/extable.h
new file mode 120000
index 000000000000..6385f059a951
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/extable.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/extable.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
index 82f7bdc2e5e6..67ca297c5cca 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
@@ -57,7 +57,7 @@ trans:
: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2),
[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
[flt_2] "b" (&b), [cptr1] "b" (&cptr[1])
- : "memory", "r7", "r8", "r9", "r10",
+ : "memory", "r0", "r7", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15", "r16",
"r17", "r18", "r19", "r20", "r21", "r22",
"r23", "r24", "r25", "r26", "r27", "r28",
@@ -113,6 +113,7 @@ int ptrace_tm_gpr(void)
int ret, status;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
pid = fork();
if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
index ad65be6e8e85..6f2bce1b6c5d 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -65,7 +65,7 @@ trans:
: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
[flt_4] "b" (&d)
- : "memory", "r5", "r6", "r7",
+ : "memory", "r0", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
@@ -119,6 +119,7 @@ int ptrace_tm_spd_gpr(void)
int ret, status;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
index 2ecfa1158e2b..e112a34fbe59 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
@@ -129,6 +129,7 @@ int ptrace_tm_spd_tar(void)
int ret, status;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
if (pid == 0)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
index 6f7fb51f0809..40133d49fe39 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
@@ -129,6 +129,7 @@ int ptrace_tm_spd_vsx(void)
int ret, status, i;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
for (i = 0; i < 128; i++) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
index 068bfed2e606..880ba6a29a48 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
@@ -114,6 +114,7 @@ int ptrace_tm_spr(void)
int ret, status;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(struct shared), 0777|IPC_CREAT);
shm_id1 = shmget(IPC_PRIVATE, sizeof(int), 0777|IPC_CREAT);
pid = fork();
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
index 46ef378a15ec..d0db6df0f0ea 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
@@ -117,6 +117,7 @@ int ptrace_tm_tar(void)
int ret, status;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
pid = fork();
if (pid == 0)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
index 70ca01234f79..4f05ce4fd282 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
@@ -113,6 +113,7 @@ int ptrace_tm_vsx(void)
int ret, status, i;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
for (i = 0; i < 128; i++) {
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index 844d18cd5f93..7488315fd847 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: GPL-2.0+
TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2
+TEST_PROGS := mitigation-patching.sh
+
top_srcdir = ../../../../..
CFLAGS += -I../../../../../usr/include
diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
new file mode 100755
index 000000000000..00197acb7ff1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+TIMEOUT=10
+
+function do_one
+{
+ local mitigation="$1"
+ local orig
+ local start
+ local now
+
+ orig=$(cat "$mitigation")
+
+ start=$EPOCHSECONDS
+ now=$start
+
+ while [[ $((now-start)) -lt "$TIMEOUT" ]]
+ do
+ echo 0 > "$mitigation"
+ echo 1 > "$mitigation"
+
+ now=$EPOCHSECONDS
+ done
+
+ echo "$orig" > "$mitigation"
+}
+
+rc=0
+cd /sys/kernel/debug/powerpc || rc=1
+if [[ "$rc" -ne 0 ]]; then
+ echo "Error: couldn't cd to /sys/kernel/debug/powerpc" >&2
+ exit 1
+fi
+
+tainted=$(cat /proc/sys/kernel/tainted)
+if [[ "$tainted" -ne 0 ]]; then
+ echo "Error: kernel already tainted!" >&2
+ exit 1
+fi
+
+mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush uaccess_flush"
+
+for m in $mitigations
+do
+ do_one "$m" &
+done
+
+echo "Spawned threads enabling/disabling mitigations ..."
+
+if stress-ng > /dev/null 2>&1; then
+ stress="stress-ng"
+elif stress > /dev/null 2>&1; then
+ stress="stress"
+else
+ stress=""
+fi
+
+if [[ -n "$stress" ]]; then
+ "$stress" -m "$(nproc)" -t "$TIMEOUT" &
+ echo "Spawned VM stressors ..."
+fi
+
+echo "Waiting for timeout ..."
+wait
+
+tainted=$(cat /proc/sys/kernel/tainted)
+if [[ "$tainted" -ne 0 ]]; then
+ echo "Error: kernel became tainted!" >&2
+ exit 1
+fi
+
+echo "OK"
+exit 0
diff --git a/tools/testing/selftests/powerpc/signal/signal_tm.c b/tools/testing/selftests/powerpc/signal/signal_tm.c
index 5bf2224ef7f2..c9cf66a3daa2 100644
--- a/tools/testing/selftests/powerpc/signal/signal_tm.c
+++ b/tools/testing/selftests/powerpc/signal/signal_tm.c
@@ -56,6 +56,7 @@ static int test_signal_tm()
}
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
for (i = 0; i < MAX_ATTEMPT; i++) {
/*
diff --git a/tools/testing/selftests/powerpc/tm/tm-exec.c b/tools/testing/selftests/powerpc/tm/tm-exec.c
index 260cfdb97d23..c59919d6710d 100644
--- a/tools/testing/selftests/powerpc/tm/tm-exec.c
+++ b/tools/testing/selftests/powerpc/tm/tm-exec.c
@@ -27,6 +27,7 @@ static char *path;
static int test_exec(void)
{
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
asm __volatile__(
"tbegin.;"
diff --git a/tools/testing/selftests/powerpc/tm/tm-fork.c b/tools/testing/selftests/powerpc/tm/tm-fork.c
index 6efa5a685a77..c27b935f0e9f 100644
--- a/tools/testing/selftests/powerpc/tm/tm-fork.c
+++ b/tools/testing/selftests/powerpc/tm/tm-fork.c
@@ -21,6 +21,7 @@
int test_fork(void)
{
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
asm __volatile__(
"tbegin.;"
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
index 29e5f26af7b9..a7bbf034b5bb 100644
--- a/tools/testing/selftests/powerpc/tm/tm-poison.c
+++ b/tools/testing/selftests/powerpc/tm/tm-poison.c
@@ -20,7 +20,6 @@
#include <sched.h>
#include <sys/types.h>
#include <signal.h>
-#include <inttypes.h>
#include "tm.h"
@@ -34,6 +33,7 @@ int tm_poison_test(void)
bool fail_vr = false;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
cpu = pick_online_cpu();
FAIL_IF(cpu < 0);
diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
index 4cdb83964bb3..85c940ae6ff8 100644
--- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
@@ -40,6 +40,7 @@ int test_body(void)
uint64_t rv, dscr1 = 1, dscr2, texasr;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
printf("Check DSCR TM context switch: ");
fflush(stdout);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
index 254f912ad611..657d755b2905 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
@@ -79,6 +79,7 @@ static int tm_signal_context_chk_fpu()
pid_t pid = getpid();
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
act.sa_sigaction = signal_usr1;
sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
index 0cc680f61828..400fa70ca71e 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
@@ -81,6 +81,7 @@ static int tm_signal_context_chk_gpr()
pid_t pid = getpid();
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
act.sa_sigaction = signal_usr1;
sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
index b6d52730a0d8..d628fd302b28 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
@@ -104,6 +104,7 @@ static int tm_signal_context_chk()
pid_t pid = getpid();
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
act.sa_sigaction = signal_usr1;
sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
index 8e25e2072ecd..9bd869245bad 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
@@ -153,6 +153,7 @@ static int tm_signal_context_chk()
pid_t pid = getpid();
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
act.sa_sigaction = signal_usr1;
sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
index 5908bc6abe60..0b84c9208d62 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
@@ -226,6 +226,7 @@ int tm_signal_pagefault(void)
stack_t ss;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
SKIP_IF(!have_userfaultfd());
setup_uf_mem();
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
index 07c388147b75..06b801906f27 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
@@ -32,6 +32,7 @@ int tm_signal_sigreturn_nt(void)
struct sigaction trap_sa;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
trap_sa.sa_flags = SA_SIGINFO;
trap_sa.sa_sigaction = trap_signal_handler;
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
index cdcf8c5bbbc7..68807aac8dd3 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
@@ -35,6 +35,7 @@ int tm_signal_stack()
int pid;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
pid = fork();
if (pid < 0)
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
index 9a6017a1d769..ffe4e5515f33 100644
--- a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
+++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
@@ -55,6 +55,7 @@ int tm_sigreturn(void)
uint64_t ret = 0;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
SKIP_IF(!is_ppc64le());
memset(&sa, 0, sizeof(sa));
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
index bd1ca25febe4..aed632d29fff 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#include <ppc-asm.h>
+#include <basic_asm.h>
#include <asm/unistd.h>
.text
@@ -26,3 +26,38 @@ FUNC_START(getppid_tm_suspended)
1:
li r3, -1
blr
+
+
+.macro scv level
+ .long (0x44000001 | (\level) << 5)
+.endm
+
+FUNC_START(getppid_scv_tm_active)
+ PUSH_BASIC_STACK(0)
+ tbegin.
+ beq 1f
+ li r0, __NR_getppid
+ scv 0
+ tend.
+ POP_BASIC_STACK(0)
+ blr
+1:
+ li r3, -1
+ POP_BASIC_STACK(0)
+ blr
+
+FUNC_START(getppid_scv_tm_suspended)
+ PUSH_BASIC_STACK(0)
+ tbegin.
+ beq 1f
+ li r0, __NR_getppid
+ tsuspend.
+ scv 0
+ tresume.
+ tend.
+ POP_BASIC_STACK(0)
+ blr
+1:
+ li r3, -1
+ POP_BASIC_STACK(0)
+ blr
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c
index becb8207b432..b763354c2eb4 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall.c
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -19,24 +19,36 @@
#include "utils.h"
#include "tm.h"
+#ifndef PPC_FEATURE2_SCV
+#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */
+#endif
+
extern int getppid_tm_active(void);
extern int getppid_tm_suspended(void);
+extern int getppid_scv_tm_active(void);
+extern int getppid_scv_tm_suspended(void);
unsigned retries = 0;
#define TEST_DURATION 10 /* seconds */
-#define TM_RETRIES 100
-pid_t getppid_tm(bool suspend)
+pid_t getppid_tm(bool scv, bool suspend)
{
int i;
pid_t pid;
for (i = 0; i < TM_RETRIES; i++) {
- if (suspend)
- pid = getppid_tm_suspended();
- else
- pid = getppid_tm_active();
+ if (suspend) {
+ if (scv)
+ pid = getppid_scv_tm_suspended();
+ else
+ pid = getppid_tm_suspended();
+ } else {
+ if (scv)
+ pid = getppid_scv_tm_active();
+ else
+ pid = getppid_tm_active();
+ }
if (pid >= 0)
return pid;
@@ -67,6 +79,7 @@ int tm_syscall(void)
struct timeval end, now;
SKIP_IF(!have_htm_nosc());
+ SKIP_IF(htm_is_synthetic());
setbuf(stdout, NULL);
@@ -82,15 +95,24 @@ int tm_syscall(void)
* Test a syscall within a suspended transaction and verify
* that it succeeds.
*/
- FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */
+ FAIL_IF(getppid_tm(false, true) == -1); /* Should succeed. */
/*
* Test a syscall within an active transaction and verify that
* it fails with the correct failure code.
*/
- FAIL_IF(getppid_tm(false) != -1); /* Should fail... */
+ FAIL_IF(getppid_tm(false, false) != -1); /* Should fail... */
FAIL_IF(!failure_is_persistent()); /* ...persistently... */
FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */
+
+ /* Now do it all again with scv if it is available. */
+ if (have_hwcap2(PPC_FEATURE2_SCV)) {
+ FAIL_IF(getppid_tm(true, true) == -1); /* Should succeed. */
+ FAIL_IF(getppid_tm(true, false) != -1); /* Should fail... */
+ FAIL_IF(!failure_is_persistent()); /* ...persistently... */
+ FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */
+ }
+
gettimeofday(&now, 0);
}
diff --git a/tools/testing/selftests/powerpc/tm/tm-tar.c b/tools/testing/selftests/powerpc/tm/tm-tar.c
index 03be8c47292b..f2a9137f3c1e 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tar.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tar.c
@@ -26,6 +26,7 @@ int test_tar(void)
int i;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
SKIP_IF(!is_ppc64le());
for (i = 0; i < num_loops; i++)
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
index 794d574db784..dd5ddffa28b7 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -96,6 +96,7 @@ int test_tmspr()
unsigned long i;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
/* To cause some context switching */
thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
index 11521077f915..97cb74768e30 100644
--- a/tools/testing/selftests/powerpc/tm/tm-trap.c
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -255,6 +255,7 @@ int tm_trap_test(void)
struct sigaction trap_sa;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
trap_sa.sa_flags = SA_SIGINFO;
trap_sa.sa_sigaction = trap_signal_handler;
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index a1348a5f721a..6bf1b65b020d 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -344,6 +344,7 @@ int tm_unavailable_test(void)
cpu_set_t cpuset;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
cpu = pick_online_cpu();
FAIL_IF(cpu < 0);
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
index e2a0c07e8362..34364ed2b6b7 100644
--- a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
+++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
@@ -17,7 +17,6 @@
#include <pthread.h>
#include <sys/mman.h>
#include <unistd.h>
-#include <pthread.h>
#include "tm.h"
#include "utils.h"
@@ -92,6 +91,7 @@ int tm_vmx_unavail_test()
pthread_t *thread;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
passed = 1;
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
index c1e788a6df47..1640e7ead69b 100644
--- a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
+++ b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
@@ -46,6 +46,7 @@ int test_vmxcopy()
uint64_t aborted = 0;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
SKIP_IF(!is_ppc64le());
fd = mkstemp(tmpfile);
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index c5a1e5c163fc..c03c6e778876 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -10,6 +10,9 @@
#include <asm/tm.h>
#include "utils.h"
+#include "reg.h"
+
+#define TM_RETRIES 100
static inline bool have_htm(void)
{
@@ -31,6 +34,39 @@ static inline bool have_htm_nosc(void)
#endif
}
+/*
+ * Transactional Memory was removed in ISA 3.1. A synthetic TM implementation
+ * is provided on P10 for threads running in P8/P9 compatibility mode. The
+ * synthetic implementation immediately fails after tbegin. This failure sets
+ * Bit 7 (Failure Persistent) and Bit 15 (Implementation-specific).
+ */
+static inline bool htm_is_synthetic(void)
+{
+ int i;
+
+ /*
+ * Per the ISA, the Failure Persistent bit may be incorrect. Try a few
+ * times in case we got an Implementation-specific failure on a non ISA
+ * v3.1 system. On these systems the Implementation-specific failure
+ * should not be persistent.
+ */
+ for (i = 0; i < TM_RETRIES; i++) {
+ asm volatile(
+ "tbegin.;"
+ "beq 1f;"
+ "tend.;"
+ "1:"
+ :
+ :
+ : "memory");
+
+ if ((__builtin_get_texasr() & (TEXASR_FP | TEXASR_IC)) !=
+ (TEXASR_FP | TEXASR_IC))
+ break;
+ }
+ return i == TM_RETRIES;
+}
+
static inline long failure_code(void)
{
return __builtin_get_texasru() >> 24;
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index 15d937ba96ca..fd1ffaa5a135 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -68,16 +68,12 @@ do
cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
srand(n + me + systime());
ncpus = split(cpus, ca);
- curcpu = ca[int(rand() * ncpus + 1)];
- z = "";
- for (i = 1; 4 * i <= curcpu; i++)
- z = z "0";
- print "0x" 2 ^ (curcpu % 4) z;
+ print ca[int(rand() * ncpus + 1)];
}' < /dev/null`
n=$(($n+1))
- if ! taskset -p $cpumask $$ > /dev/null 2>&1
+ if ! taskset -c -p $cpumask $$ > /dev/null 2>&1
then
- echo taskset failure: '"taskset -p ' $cpumask $$ '"'
+ echo taskset failure: '"taskset -c -p ' $cpumask $$ '"'
exit 1
fi
diff --git a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
index e5cc6b2f195e..1af5d6b86b39 100755
--- a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
+++ b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
@@ -14,7 +14,7 @@ if test -z "$TORTURE_KCONFIG_KCSAN_ARG"
then
exit 0
fi
-cat $1/*/console.log |
+find $1 -name console.log -exec cat {} \; |
grep "BUG: KCSAN: " |
sed -e 's/^\[[^]]*] //' |
sort |
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
index 46e47a00a7db..5a0023d183da 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-again.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -29,7 +29,7 @@ then
echo "Usage: $scriptname /path/to/old/run [ options ]"
exit 1
fi
-if ! cp "$oldrun/batches" $T/batches.oldrun
+if ! cp "$oldrun/scenarios" $T/scenarios.oldrun
then
# Later on, can reconstitute this from console.log files.
echo Prior run batches file does not exist: $oldrun/batches
@@ -142,7 +142,9 @@ then
echo "Cannot copy from $oldrun to $rundir."
usage
fi
-rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
+rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
+touch "$rundir/log"
+echo $scriptname $args | tee -a "$rundir/log"
echo $oldrun > "$rundir/re-run"
if ! test -d "$rundir/../../bin"
then
@@ -165,35 +167,18 @@ done
grep '^#' $i | sed -e 's/^# //' > $T/qemu-cmd-settings
. $T/qemu-cmd-settings
-grep -v '^#' $T/batches.oldrun | awk '
-BEGIN {
- oldbatch = 1;
-}
-
+grep -v '^#' $T/scenarios.oldrun | awk '
{
- if (oldbatch != $1) {
- print "kvm-test-1-run-batch.sh" curbatch;
- curbatch = "";
- oldbatch = $1;
- }
- curbatch = curbatch " " $2;
-}
-
-END {
- print "kvm-test-1-run-batch.sh" curbatch
+ curbatch = "";
+ for (i = 2; i <= NF; i++)
+ curbatch = curbatch " " $i;
+ print "kvm-test-1-run-batch.sh" curbatch;
}' > $T/runbatches.sh
if test -n "$dryrun"
then
echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log"
else
- ( cd "$rundir"; sh $T/runbatches.sh )
- kcsan-collapse.sh "$rundir" | tee -a "$rundir/log"
- echo | tee -a "$rundir/log"
- echo ---- Results directory: $rundir | tee -a "$rundir/log"
- kvm-recheck.sh "$rundir" > $T/kvm-recheck.sh.out 2>&1
- ret=$?
- cat $T/kvm-recheck.sh.out | tee -a "$rundir/log"
- echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a "$rundir/log"
- exit $ret
+ ( cd "$rundir"; sh $T/runbatches.sh ) | tee -a "$rundir/log"
+ kvm-end-run-stats.sh "$rundir" "$starttime"
fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
new file mode 100755
index 000000000000..f99b2c146f83
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
@@ -0,0 +1,106 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Produce awk statements roughly depicting the system's CPU and cache
+# layout. If the required information is not available, produce
+# error messages as awk comments. Successful exit regardless.
+#
+# Usage: kvm-assign-cpus.sh /path/to/sysfs
+
+T=/tmp/kvm-assign-cpus.sh.$$
+trap 'rm -rf $T' 0 2
+mkdir $T
+
+sysfsdir=${1-/sys/devices/system/node}
+if ! cd "$sysfsdir" > $T/msg 2>&1
+then
+ sed -e 's/^/# /' < $T/msg
+ exit 0
+fi
+nodelist="`ls -d node*`"
+for i in node*
+do
+ if ! test -d $i/
+ then
+ echo "# Not a directory: $sysfsdir/node*"
+ exit 0
+ fi
+ for j in $i/cpu*/cache/index*
+ do
+ if ! test -d $j/
+ then
+ echo "# Not a directory: $sysfsdir/$j"
+ exit 0
+ else
+ break
+ fi
+ done
+ indexlist="`ls -d $i/cpu* | grep 'cpu[0-9][0-9]*' | head -1 | sed -e 's,^.*$,ls -d &/cache/index*,' | sh | sed -e 's,^.*/,,'`"
+ break
+done
+for i in node*/cpu*/cache/index*/shared_cpu_list
+do
+ if ! test -f $i
+ then
+ echo "# Not a file: $sysfsdir/$i"
+ exit 0
+ else
+ break
+ fi
+done
+firstshared=
+for i in $indexlist
+do
+ rm -f $T/cpulist
+ for n in node*
+ do
+ f="$n/cpu*/cache/$i/shared_cpu_list"
+ if ! cat $f > $T/msg 2>&1
+ then
+ sed -e 's/^/# /' < $T/msg
+ exit 0
+ fi
+ cat $f >> $T/cpulist
+ done
+ if grep -q '[-,]' $T/cpulist
+ then
+ if test -z "$firstshared"
+ then
+ firstshared="$i"
+ fi
+ fi
+done
+if test -z "$firstshared"
+then
+ splitindex="`echo $indexlist | sed -e 's/ .*$//'`"
+else
+ splitindex="$firstshared"
+fi
+nodenum=0
+for n in node*
+do
+ cat $n/cpu*/cache/$splitindex/shared_cpu_list | sort -u -k1n |
+ awk -v nodenum="$nodenum" '
+ BEGIN {
+ idx = 0;
+ }
+
+ {
+ nlists = split($0, cpulists, ",");
+ for (i = 1; i <= nlists; i++) {
+ listsize = split(cpulists[i], cpus, "-");
+ if (listsize == 1)
+ cpus[2] = cpus[1];
+ for (j = cpus[1]; j <= cpus[2]; j++) {
+ print "cpu[" nodenum "][" idx "] = " j ";";
+ idx++;
+ }
+ }
+ }
+
+ END {
+ print "nodecpus[" nodenum "] = " idx ";";
+ }'
+ nodenum=`expr $nodenum + 1`
+done
+echo "numnodes = $nodenum;"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 115e1822b26f..5ad973dca820 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -40,8 +40,10 @@ if test $retval -gt 1
then
exit 2
fi
-ncpus=`cpus2use.sh`
-make -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
+
+# Tell "make" to use double the number of real CPUs on the build system.
+ncpus="`getconf _NPROCESSORS_ONLN`"
+make -j$((2 * ncpus)) $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
retval=$?
if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
new file mode 100755
index 000000000000..e4a00779b8c6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Check the status of the specified run.
+#
+# Usage: kvm-end-run-stats.sh /path/to/run starttime
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+# scriptname=$0
+# args="$*"
+rundir="$1"
+if ! test -d "$rundir"
+then
+ echo kvm-end-run-stats.sh: Specified run directory does not exist: $rundir
+ exit 1
+fi
+
+T=${TMPDIR-/tmp}/kvm-end-run-stats.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
+PATH=${KVM}/bin:$PATH; export PATH
+. functions.sh
+default_starttime="`get_starttime`"
+starttime="${2-default_starttime}"
+
+echo | tee -a "$rundir/log"
+echo | tee -a "$rundir/log"
+echo " --- `date` Test summary:" | tee -a "$rundir/log"
+echo Results directory: $rundir | tee -a "$rundir/log"
+kcsan-collapse.sh "$rundir" | tee -a "$rundir/log"
+kvm-recheck.sh "$rundir" > $T/kvm-recheck.sh.out 2>&1
+ret=$?
+cat $T/kvm-recheck.sh.out | tee -a "$rundir/log"
+echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a "$rundir/log"
+exit $ret
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 0670841122d8..daf64b507038 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -43,7 +43,7 @@ then
else
echo No build errors.
fi
-if grep -q -e "--buildonly" < ${rundir}/log
+if grep -q -e "--build-\?only" < ${rundir}/log && ! test -f "${rundir}/remote-log"
then
echo Build-only run, no console logs to check.
exit $editorret
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh b/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh
new file mode 100755
index 000000000000..20c7c53c5795
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh
@@ -0,0 +1,88 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Create an awk script that takes as input numbers of CPUs and outputs
+# lists of CPUs, one per line in both cases.
+#
+# Usage: kvm-get-cpus-script.sh /path/to/cpu/arrays /path/to/put/script [ /path/to/state ]
+#
+# The CPU arrays are output by kvm-assign-cpus.sh, and are valid awk
+# statements initializing the variables describing the system's topology.
+#
+# The optional state is input by this script (if the file exists and is
+# non-empty), and can also be output by this script.
+
+cpuarrays="${1-/sys/devices/system/node}"
+scriptfile="${2}"
+statefile="${3}"
+
+if ! test -f "$cpuarrays"
+then
+ echo "File not found: $cpuarrays" 1>&2
+ exit 1
+fi
+scriptdir="`dirname "$scriptfile"`"
+if ! test -d "$scriptdir" || ! test -x "$scriptdir" || ! test -w "$scriptdir"
+then
+ echo "Directory not usable for script output: $scriptdir"
+ exit 1
+fi
+
+cat << '___EOF___' > "$scriptfile"
+BEGIN {
+___EOF___
+cat "$cpuarrays" >> "$scriptfile"
+if test -r "$statefile"
+then
+ cat "$statefile" >> "$scriptfile"
+fi
+cat << '___EOF___' >> "$scriptfile"
+}
+
+# Do we have the system architecture to guide CPU affinity?
+function gotcpus()
+{
+ return numnodes != "";
+}
+
+# Return a comma-separated list of the next n CPUs.
+function nextcpus(n, i, s)
+{
+ for (i = 0; i < n; i++) {
+ if (nodecpus[curnode] == "")
+ curnode = 0;
+ if (cpu[curnode][curcpu[curnode]] == "")
+ curcpu[curnode] = 0;
+ if (s != "")
+ s = s ",";
+ s = s cpu[curnode][curcpu[curnode]];
+ curcpu[curnode]++;
+ curnode++
+ }
+ return s;
+}
+
+# Dump out the current node/CPU state so that a later invocation of this
+# script can continue where this one left off. Of course, this only works
+# when a state file was specified and where there was valid sysfs state.
+# Returns 1 if the state was dumped, 0 otherwise.
+#
+# Dumping the state for one system configuration and loading it into
+# another isn't likely to do what you want, whatever that might be.
+function dumpcpustate( i, fn)
+{
+___EOF___
+echo ' fn = "'"$statefile"'";' >> $scriptfile
+cat << '___EOF___' >> "$scriptfile"
+ if (fn != "" && gotcpus()) {
+ print "curnode = " curnode ";" > fn;
+ for (i = 0; i < numnodes; i++)
+ if (curcpu[i] != "")
+ print "curcpu[" i "] = " curcpu[i] ";" >> fn;
+ return 1;
+ }
+ if (fn != "")
+ print "# No CPU state to dump." > fn;
+ return 0;
+}
+___EOF___
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
index f3a7a5e2b89d..db2c0e2c8e1d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
@@ -25,7 +25,7 @@ then
echo "$configfile -------"
else
title="$configfile ------- $ncs acquisitions/releases"
- dur=`sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
+ dur=`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' 2> /dev/null`
if test -z "$dur"
then
:
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 1706cd4466b4..fbdf162b6acd 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -31,7 +31,7 @@ then
echo "$configfile ------- " $stopstate
else
title="$configfile ------- $ngps GPs"
- dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
+ dur=`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//'`
if test -z "$dur"
then
:
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
index 671bfee4fcef..3afa5c6eda4f 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
@@ -25,7 +25,7 @@ if test -z "$nscfs"
then
echo "$configfile ------- "
else
- dur="`sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`"
+ dur="`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' 2> /dev/null`"
if test -z "$dur"
then
rate=""
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index e01b31b87044..0a5419982ab3 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -74,7 +74,10 @@ do
done
if test -f "$rd/kcsan.sum"
then
- if grep -q CONFIG_KCSAN=y $T
+ if ! test -f $T
+ then
+ :
+ elif grep -q CONFIG_KCSAN=y $T
then
echo "Compiler or architecture does not support KCSAN!"
echo Did you forget to switch your compiler with '--kmake-arg CC=<cc-that-supports-kcsan>'?
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh
new file mode 100755
index 000000000000..014ce68260d7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Periodically scan a directory tree to prevent files from being reaped
+# by systemd and friends on long runs.
+#
+# Usage: kvm-remote-noreap.sh pathname
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+pathname="$1"
+if test "$pathname" = ""
+then
+ echo Usage: kvm-remote-noreap.sh pathname
+ exit 1
+fi
+if ! test -d "$pathname"
+then
+ echo Usage: kvm-remote-noreap.sh pathname
+ echo " pathname must be a directory."
+ exit 2
+fi
+
+while test -d "$pathname"
+do
+ find "$pathname" -type f -exec touch -c {} \; > /dev/null 2>&1
+ sleep 30
+done
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
new file mode 100755
index 000000000000..03126eb6ec5a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -0,0 +1,261 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Run a series of tests on remote systems under KVM.
+#
+# Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
+# kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+scriptname=$0
+args="$*"
+
+if ! test -d tools/testing/selftests/rcutorture/bin
+then
+ echo $scriptname must be run from top-level directory of kernel source tree.
+ exit 1
+fi
+
+KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
+PATH=${KVM}/bin:$PATH; export PATH
+. functions.sh
+
+starttime="`get_starttime`"
+
+systems="$1"
+if test -z "$systems"
+then
+ echo $scriptname: Empty list of systems will go nowhere good, giving up.
+ exit 1
+fi
+shift
+
+# Pathnames:
+# T: /tmp/kvm-remote.sh.$$
+# resdir: /tmp/kvm-remote.sh.$$/res
+# rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix)
+# oldrun: `pwd`/tools/testing/.../res/$otherds
+#
+# Pathname segments:
+# TD: kvm-remote.sh.$$
+# ds: yyyy.mm.dd-hh.mm.ss-remote
+
+TD=kvm-remote.sh.$$
+T=${TMPDIR-/tmp}/$TD
+trap 'rm -rf $T' 0
+mkdir $T
+
+resdir="$T/res"
+ds=`date +%Y.%m.%d-%H.%M.%S`-remote
+rundir=$resdir/$ds
+echo Results directory: $rundir
+echo $scriptname $args
+if echo $1 | grep -q '^--'
+then
+ # Fresh build. Create a datestamp unless the caller supplied one.
+ datestamp="`echo "$@" | awk -v ds="$ds" '{
+ for (i = 1; i < NF; i++) {
+ if ($i == "--datestamp") {
+ ds = "";
+ break;
+ }
+ }
+ if (ds != "")
+ print "--datestamp " ds;
+ }'`"
+ kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ echo $scriptname: kvm.sh failed exit code $?
+ cat $T/kvm.sh.out
+ exit 2
+ fi
+ oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`"
+ touch "$oldrun/remote-log"
+ echo $scriptname $args >> "$oldrun/remote-log"
+ echo | tee -a "$oldrun/remote-log"
+ echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
+ cat $T/kvm.sh.out | tee -a "$oldrun/remote-log"
+ # We are going to run this, so remove the buildonly files.
+ rm -f "$oldrun"/*/buildonly
+ kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
+ cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
+ exit 2
+ fi
+else
+ # Re-use old run.
+ oldrun="$1"
+ if ! echo $oldrun | grep -q '^/'
+ then
+ oldrun="`pwd`/$oldrun"
+ fi
+ shift
+ touch "$oldrun/remote-log"
+ echo $scriptname $args >> "$oldrun/remote-log"
+ kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
+ cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
+ exit 2
+ fi
+ cp -a "$rundir" "$KVM/res/"
+ oldrun="$KVM/res/$ds"
+fi
+echo | tee -a "$oldrun/remote-log"
+echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
+cat $T/kvm-again.sh.out
+echo | tee -a "$oldrun/remote-log"
+echo Remote run directory: $rundir | tee -a "$oldrun/remote-log"
+echo Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log"
+
+# Create the kvm-remote-N.sh scripts in the bin directory.
+awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
+{
+ n = $1;
+ sub(/\./, "", n);
+ fn = dest "/kvm-remote-" n ".sh"
+ print "kvm-remote-noreap.sh " rundir " &" > fn;
+ scenarios = "";
+ for (i = 2; i <= NF; i++)
+ scenarios = scenarios " " $i;
+ print "kvm-test-1-run-batch.sh" scenarios >> fn;
+ print "sync" >> fn;
+ print "rm " rundir "/remote.run" >> fn;
+}'
+chmod +x $T/bin/kvm-remote-*.sh
+( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )
+
+# Check first to avoid the need for cleanup for system-name typos
+for i in $systems
+do
+ ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
+ echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
+ exit 4 | tee -a "$oldrun/remote-log"
+ fi
+done
+
+# Download and expand the tarball on all systems.
+for i in $systems
+do
+ echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
+ cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
+ exit 10 | tee -a "$oldrun/remote-log"
+ fi
+done
+
+# Function to check for presence of a file on the specified system.
+# Complain if the system cannot be reached, and retry after a wait.
+# Currently just waits forever if a machine disappears.
+#
+# Usage: checkremotefile system pathname
+checkremotefile () {
+ local ret
+ local sleeptime=60
+
+ while :
+ do
+ ssh $1 "test -f \"$2\""
+ ret=$?
+ if test "$ret" -eq 255
+ then
+ echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
+ elif test "$ret" -eq 0
+ then
+ return 0
+ elif test "$ret" -eq 1
+ then
+ echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\"
+ return 1
+ else
+ echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date`
+ return $ret
+ fi
+ sleep $sleeptime
+ done
+}
+
+# Function to start batches on idle remote $systems
+#
+# Usage: startbatches curbatch nbatches
+#
+# Batches are numbered starting at 1. Returns the next batch to start.
+# Be careful to redirect all debug output to FD 2 (stderr).
+startbatches () {
+ local curbatch="$1"
+ local nbatches="$2"
+ local ret
+
+ # Each pass through the following loop examines one system.
+ for i in $systems
+ do
+ if test "$curbatch" -gt "$nbatches"
+ then
+ echo $((nbatches + 1))
+ return 0
+ fi
+ if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
+ then
+ continue # System still running last test, skip.
+ fi
+ ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ echo ssh $i failed: exitcode $ret 1>&2
+ exit 11
+ fi
+ echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2
+ curbatch=$((curbatch + 1))
+ done
+ echo $curbatch
+}
+
+# Launch all the scenarios.
+nbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`"
+curbatch=1
+while test "$curbatch" -le "$nbatches"
+do
+ startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
+ curbatch="`cat $T/curbatch`"
+ if test -s "$T/startbatches.stderr"
+ then
+ cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log"
+ fi
+ if test "$curbatch" -le "$nbatches"
+ then
+ sleep 30
+ fi
+done
+echo All batches started. `date`
+
+# Wait for all remaining scenarios to complete and collect results.
+for i in $systems
+do
+ while checkremotefile "$i" "$resdir/$ds/remote.run"
+ do
+ sleep 30
+ done
+ echo " ---" Collecting results from $i `date`
+ ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
+done
+
+( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
+exit "`cat $T/exitcode`"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
index 7ea0809e229e..1e29d656501b 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
@@ -50,10 +50,34 @@ grep '^#' $1/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
echo ---- System running test: `uname -a`
echo ---- Starting kernels. `date` | tee -a log
$TORTURE_JITTER_START
+kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk
for i in "$@"
do
echo ---- System running test: `uname -a` > $i/kvm-test-1-run-qemu.sh.out
echo > $i/kvm-test-1-run-qemu.sh.out
+ export TORTURE_AFFINITY=
+ kvm-get-cpus-script.sh $T/cpuarray.awk $T/cpubatches.awk $T/cpustate
+ cat << ' ___EOF___' >> $T/cpubatches.awk
+ END {
+ affinitylist = "";
+ if (!gotcpus()) {
+ print "echo No CPU-affinity information, so no taskset command.";
+ } else if (cpu_count !~ /^[0-9][0-9]*$/) {
+ print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command.";
+ } else {
+ affinitylist = nextcpus(cpu_count);
+ if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/))
+ print "echo " scenario ": Bogus CPU-affinity information, so no taskset command.";
+ else if (!dumpcpustate())
+ print "echo " scenario ": Could not dump state, so no taskset command.";
+ else
+ print "export TORTURE_AFFINITY=" affinitylist;
+ }
+ }
+ ___EOF___
+ cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`"
+ affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`"
+ $affinity_export
kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 &
done
for i in $runfiles
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
index 5b1aa2a4f3f6..44280582c594 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
@@ -39,27 +39,34 @@ echo ' ---' `date`: Starting kernel, PID $$
grep '^#' $resdir/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
. $T/qemu-cmd-settings
-# Decorate qemu-cmd with redirection, backgrounding, and PID capture
-sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
-echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd
+# Decorate qemu-cmd with affinity, redirection, backgrounding, and PID capture
+taskset_command=
+if test -n "$TORTURE_AFFINITY"
+then
+ taskset_command="taskset -c $TORTURE_AFFINITY "
+fi
+sed -e 's/^[^#].*$/'"$taskset_command"'& 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
+echo 'qemu_pid=$!' >> $T/qemu-cmd
+echo 'echo $qemu_pid > $resdir/qemu-pid' >> $T/qemu-cmd
+echo 'taskset -c -p $qemu_pid > $resdir/qemu-affinity' >> $T/qemu-cmd
# In case qemu refuses to run...
echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
# Attempt to run qemu
kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
-( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+( . $T/qemu-cmd; wait `cat $resdir/qemu-pid`; echo $? > $resdir/qemu-retval ) &
commandcompleted=0
if test -z "$TORTURE_KCONFIG_GDB_ARG"
then
sleep 10 # Give qemu's pid a chance to reach the file
- if test -s "$resdir/qemu_pid"
+ if test -s "$resdir/qemu-pid"
then
- qemu_pid=`cat "$resdir/qemu_pid"`
- echo Monitoring qemu job at pid $qemu_pid
+ qemu_pid=`cat "$resdir/qemu-pid"`
+ echo Monitoring qemu job at pid $qemu_pid `date`
else
qemu_pid=""
- echo Monitoring qemu job at yet-as-unknown pid
+ echo Monitoring qemu job at yet-as-unknown pid `date`
fi
fi
if test -n "$TORTURE_KCONFIG_GDB_ARG"
@@ -82,9 +89,9 @@ then
fi
while :
do
- if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+ if test -z "$qemu_pid" && test -s "$resdir/qemu-pid"
then
- qemu_pid=`cat "$resdir/qemu_pid"`
+ qemu_pid=`cat "$resdir/qemu-pid"`
fi
kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
@@ -115,22 +122,22 @@ do
break
fi
done
-if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+if test -z "$qemu_pid" && test -s "$resdir/qemu-pid"
then
- qemu_pid=`cat "$resdir/qemu_pid"`
+ qemu_pid=`cat "$resdir/qemu-pid"`
fi
-if test $commandcompleted -eq 0 -a -n "$qemu_pid"
+if test $commandcompleted -eq 0 && test -n "$qemu_pid"
then
if ! test -f "$resdir/../STOP.1"
then
- echo Grace period for qemu job at pid $qemu_pid
+ echo Grace period for qemu job at pid $qemu_pid `date`
fi
oldline="`tail $resdir/console.log`"
while :
do
if test -f "$resdir/../STOP.1"
then
- echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1
+ echo "PID $qemu_pid killed due to run STOP.1 request `date`" >> $resdir/Warnings 2>&1
kill -KILL $qemu_pid
break
fi
@@ -152,13 +159,17 @@ then
then
last_ts=0
fi
- if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ if test "$newline" != "$oldline" && test "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) && test "$last_ts" -gt "$TORTURE_SHUTDOWN_GRACE"
then
must_continue=yes
+ if test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ then
+ echo Continuing at console.log time $last_ts \"`tail -n 1 $resdir/console.log`\" `date`
+ fi
fi
- if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ if test $must_continue = no && test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
then
- echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
+ echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds `date`" >> $resdir/Warnings 2>&1
kill -KILL $qemu_pid
break
fi
@@ -172,5 +183,3 @@ fi
# Tell the script that this run is done.
rm -f $resdir/build.run
-
-parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 420ed5ce9d32..f4c8055dbf7a 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -205,6 +205,7 @@ echo "# TORTURE_KCONFIG_GDB_ARG=\"$TORTURE_KCONFIG_GDB_ARG\"" >> $resdir/qemu-cm
echo "# TORTURE_JITTER_START=\"$TORTURE_JITTER_START\"" >> $resdir/qemu-cmd
echo "# TORTURE_JITTER_STOP=\"$TORTURE_JITTER_STOP\"" >> $resdir/qemu-cmd
echo "# TORTURE_TRUST_MAKE=\"$TORTURE_TRUST_MAKE\"; export TORTURE_TRUST_MAKE" >> $resdir/qemu-cmd
+echo "# TORTURE_CPU_COUNT=$cpu_count" >> $resdir/qemu-cmd
if test -n "$TORTURE_BUILDONLY"
then
@@ -214,3 +215,4 @@ then
fi
kvm-test-1-run-qemu.sh $resdir
+parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 6bf00a003d3d..f442d84fb2a3 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -20,6 +20,9 @@ mkdir $T
cd `dirname $scriptname`/../../../../../
+# This script knows only English.
+LANG=en_US.UTF-8; export LANG
+
dur=$((30*60))
dryrun=""
KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
@@ -41,6 +44,7 @@ TORTURE_KCONFIG_KASAN_ARG=""
TORTURE_KCONFIG_KCSAN_ARG=""
TORTURE_KMAKE_ARG=""
TORTURE_QEMU_MEM=512
+TORTURE_REMOTE=
TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
TORTURE_MOD=rcutorture
@@ -64,7 +68,7 @@ usage () {
echo " --cpus N"
echo " --datestamp string"
echo " --defconfig string"
- echo " --dryrun batches|sched|script"
+ echo " --dryrun batches|scenarios|sched|script"
echo " --duration minutes | <seconds>s | <hours>h | <days>d"
echo " --gdb"
echo " --help"
@@ -77,6 +81,7 @@ usage () {
echo " --no-initrd"
echo " --qemu-args qemu-arguments"
echo " --qemu-cmd qemu-system-..."
+ echo " --remote"
echo " --results absolute-pathname"
echo " --torture lock|rcu|rcuscale|refscale|scf"
echo " --trust-make"
@@ -112,10 +117,13 @@ do
checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--'
cpus=$2
TORTURE_ALLOTED_CPUS="$2"
- max_cpus="`identify_qemu_vcpus`"
- if test "$TORTURE_ALLOTED_CPUS" -gt "$max_cpus"
+ if test -z "$TORTURE_REMOTE"
then
- TORTURE_ALLOTED_CPUS=$max_cpus
+ max_cpus="`identify_qemu_vcpus`"
+ if test "$TORTURE_ALLOTED_CPUS" -gt "$max_cpus"
+ then
+ TORTURE_ALLOTED_CPUS=$max_cpus
+ fi
fi
shift
;;
@@ -130,7 +138,7 @@ do
shift
;;
--dryrun)
- checkarg --dryrun "batches|sched|script" $# "$2" 'batches\|sched\|script' '^--'
+ checkarg --dryrun "batches|sched|script" $# "$2" 'batches\|scenarios\|sched\|script' '^--'
dryrun=$2
shift
;;
@@ -206,6 +214,9 @@ do
TORTURE_QEMU_CMD="$2"
shift
;;
+ --remote)
+ TORTURE_REMOTE=1
+ ;;
--results)
checkarg --results "(absolute pathname)" "$#" "$2" '^/' '^error'
resdir=$2
@@ -419,17 +430,10 @@ then
git diff HEAD >> $resdir/$ds/testid.txt
fi
___EOF___
-awk < $T/cfgcpu.pack \
- -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
- -v CONFIGDIR="$CONFIGFRAG/" \
- -v KVM="$KVM" \
- -v ncpus=$cpus \
- -v jitter="$jitter" \
- -v rd=$resdir/$ds/ \
- -v dur=$dur \
- -v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
- -v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
-'BEGIN {
+kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk
+kvm-get-cpus-script.sh $T/cpuarray.awk $T/dumpbatches.awk
+cat << '___EOF___' >> $T/dumpbatches.awk
+BEGIN {
i = 0;
}
@@ -440,7 +444,7 @@ awk < $T/cfgcpu.pack \
}
# Dump out the scripting required to run one test batch.
-function dump(first, pastlast, batchnum)
+function dump(first, pastlast, batchnum, affinitylist)
{
print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log";
print "needqemurun="
@@ -472,6 +476,14 @@ function dump(first, pastlast, batchnum)
print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
print "mkdir " rd cfr[jn] " || :";
print "touch " builddir ".wait";
+ affinitylist = "";
+ if (gotcpus()) {
+ affinitylist = nextcpus(cpusr[jn]);
+ }
+ if (affinitylist ~ /^[0-9,-][0-9,-]*$/)
+ print "export TORTURE_AFFINITY=" affinitylist;
+ else
+ print "export TORTURE_AFFINITY=";
print "kvm-test-1-run.sh " CONFIGDIR cf[j], rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
print "while test -f " builddir ".wait"
@@ -549,21 +561,20 @@ END {
# Dump the last batch.
if (ncpus != 0)
dump(first, i, batchnum);
-}' >> $T/script
-
-cat << '___EOF___' >> $T/script
-echo | tee -a $TORTURE_RESDIR/log
-echo | tee -a $TORTURE_RESDIR/log
-echo " --- `date` Test summary:" | tee -a $TORTURE_RESDIR/log
-___EOF___
-cat << ___EOF___ >> $T/script
-echo Results directory: $resdir/$ds | tee -a $resdir/$ds/log
-kcsan-collapse.sh $resdir/$ds | tee -a $resdir/$ds/log
-kvm-recheck.sh $resdir/$ds > $T/kvm-recheck.sh.out 2>&1
+}
___EOF___
-echo 'ret=$?' >> $T/script
-echo "cat $T/kvm-recheck.sh.out | tee -a $resdir/$ds/log" >> $T/script
-echo 'exit $ret' >> $T/script
+awk < $T/cfgcpu.pack \
+ -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
+ -v CONFIGDIR="$CONFIGFRAG/" \
+ -v KVM="$KVM" \
+ -v ncpus=$cpus \
+ -v jitter="$jitter" \
+ -v rd=$resdir/$ds/ \
+ -v dur=$dur \
+ -v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
+ -v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
+ -f $T/dumpbatches.awk >> $T/script
+echo kvm-end-run-stats.sh "$resdir/$ds" "$starttime" >> $T/script
# Extract the tests and their batches from the script.
egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
@@ -577,6 +588,25 @@ egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
print batchno, $1, $2
}' > $T/batches
+# As above, but one line per batch.
+grep -v '^#' $T/batches | awk '
+BEGIN {
+ oldbatch = 1;
+}
+
+{
+ if (oldbatch != $1) {
+ print ++n ". " curbatch;
+ curbatch = "";
+ oldbatch = $1;
+ }
+ curbatch = curbatch " " $2;
+}
+
+END {
+ print ++n ". " curbatch;
+}' > $T/scenarios
+
if test "$dryrun" = script
then
cat $T/script
@@ -597,13 +627,17 @@ elif test "$dryrun" = batches
then
cat $T/batches
exit 0
+elif test "$dryrun" = scenarios
+then
+ cat $T/scenarios
+ exit 0
else
# Not a dryrun. Record the batches and the number of CPUs, then run the script.
bash $T/script
ret=$?
cp $T/batches $resdir/$ds/batches
+ cp $T/scenarios $resdir/$ds/scenarios
echo '#' cpus=$cpus >> $resdir/$ds/batches
- echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a $resdir/$ds/log
exit $ret
fi
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 56e2e1a42569..363f56081eff 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -53,6 +53,7 @@ do_refscale=yes
do_kvfree=yes
do_kasan=yes
do_kcsan=no
+do_clocksourcewd=yes
# doyesno - Helper function for yes/no arguments
function doyesno () {
@@ -72,6 +73,7 @@ usage () {
echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
echo " --doall"
echo " --doallmodconfig / --do-no-allmodconfig"
+ echo " --do-clocksourcewd / --do-no-clocksourcewd"
echo " --do-kasan / --do-no-kasan"
echo " --do-kcsan / --do-no-kcsan"
echo " --do-kvfree / --do-no-kvfree"
@@ -109,7 +111,7 @@ do
configs_scftorture="$configs_scftorture $2"
shift
;;
- --doall)
+ --do-all|--doall)
do_allmodconfig=yes
do_rcutorture=yes
do_locktorture=yes
@@ -119,10 +121,14 @@ do
do_kvfree=yes
do_kasan=yes
do_kcsan=yes
+ do_clocksourcewd=yes
;;
--do-allmodconfig|--do-no-allmodconfig)
do_allmodconfig=`doyesno "$1" --do-allmodconfig`
;;
+ --do-clocksourcewd|--do-no-clocksourcewd)
+ do_clocksourcewd=`doyesno "$1" --do-clocksourcewd`
+ ;;
--do-kasan|--do-no-kasan)
do_kasan=`doyesno "$1" --do-kasan`
;;
@@ -135,7 +141,7 @@ do
--do-locktorture|--do-no-locktorture)
do_locktorture=`doyesno "$1" --do-locktorture`
;;
- --do-none)
+ --do-none|--donone)
do_allmodconfig=no
do_rcutorture=no
do_locktorture=no
@@ -145,6 +151,7 @@ do
do_kvfree=no
do_kasan=no
do_kcsan=no
+ do_clocksourcewd=no
;;
--do-rcuscale|--do-no-rcuscale)
do_rcuscale=`doyesno "$1" --do-rcuscale`
@@ -279,9 +286,9 @@ function torture_one {
# torture_bootargs="[ kernel boot arguments ]"
# torture_set flavor [ kvm.sh arguments ]
#
-# Note that "flavor" is an arbitrary string. Supply --torture if needed.
-# Note that quoting is problematic. So on the command line, pass multiple
-# values with multiple kvm.sh argument instances.
+# Note that "flavor" is an arbitrary string that does not affect kvm.sh
+# in any way. So also supply --torture if you need something other than
+# the default.
function torture_set {
local cur_kcsan_kmake_args=
local kcsan_kmake_tag=
@@ -302,7 +309,7 @@ function torture_set {
kcsan_kmake_tag="--kmake-args"
cur_kcsan_kmake_args="$kcsan_kmake_args"
fi
- torture_one $* --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan
+ torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan
fi
}
@@ -377,6 +384,22 @@ then
torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
fi
+if test "$do_clocksourcewd" = "yes"
+then
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000"
+ torture_set "clocksourcewd-1" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
+
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 clocksource.max_cswd_read_retries=1"
+ torture_set "clocksourcewd-2" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
+
+ # In case our work is already done...
+ if test "$do_rcutorture" != "yes"
+ then
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000"
+ torture_set "clocksourcewd-3" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --trust-make
+ fi
+fi
+
echo " --- " $scriptname $args
echo " --- " Done `date` | tee -a $T/log
ret=0
@@ -395,6 +418,10 @@ then
nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
ret=2
fi
+if test "$do_kcsan" = "yes"
+then
+ TORTURE_KCONFIG_KCSAN_ARG=1 tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh tools/testing/selftests/rcutorture/res/$ds > tools/testing/selftests/rcutorture/res/$ds/kcsan.sum
+fi
echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log
tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST
new file mode 100644
index 000000000000..22d598f9cabe
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST
@@ -0,0 +1,17 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=16
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=y
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
new file mode 100644
index 000000000000..f57720c52c0f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
@@ -0,0 +1,8 @@
+rcutorture.test_boost=2
+rcutorture.stutter=0
+rcutree.gp_preinit_delay=12
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
+rcutree.kthread_prio=2
+threadirqs
+tree.use_softirq=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
index bafe94cbd739..3ca112444ce7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+CONFIG_NR_CPUS=4
CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
index bafe94cbd739..3ca112444ce7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+CONFIG_NR_CPUS=4
CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
index ea4399020c6c..dc02083803ce 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+CONFIG_NR_CPUS=4
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
index 721cfda76ab2..4cc1cc581321 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
@@ -7,7 +7,7 @@ CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_HOTPLUG_CPU=n
+CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_NOCB_CPU=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
index 7629f5dd73b2..f5952061fde7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
@@ -8,7 +8,7 @@ CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_HOTPLUG_CPU=n
+CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
index 1cd25b7314e3..ad505a887bec 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
@@ -7,7 +7,7 @@ CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_HOTPLUG_CPU=n
+CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_NOCB_CPU=n
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT
index d10bc694f42c..4f08e641bb6b 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT
+++ b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT
@@ -7,7 +7,7 @@ CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_HOTPLUG_CPU=n
+CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_NOCB_CPU=n
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
index cf6938d679d7..1e24827f96f1 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
@@ -174,7 +174,7 @@ static inline bool spin_trylock(spinlock_t *lock)
}
struct completion {
- /* Hopefuly this won't overflow. */
+ /* Hopefully this won't overflow. */
unsigned int count;
};
diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README
index 4b36b25b6ac0..3d2bbd4fa3aa 100644
--- a/tools/testing/selftests/resctrl/README
+++ b/tools/testing/selftests/resctrl/README
@@ -47,7 +47,7 @@ Parameter '-h' shows usage information.
usage: resctrl_tests [-h] [-b "benchmark_cmd [options]"] [-t test list] [-n no_of_bits]
-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT default benchmark is builtin fill_buf
- -t test list: run tests specified in the test list, e.g. -t mbm, mba, cmt, cat
+ -t test list: run tests specified in the test list, e.g. -t mbm,mba,cmt,cat
-n no_of_bits: run cache tests using specified no of bits in cache bit mask
-p cpu_no: specify CPU number to run the test. 1 is default
-h: help
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index f51b5fc066a3..973f09a66e1e 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -40,7 +40,7 @@ static void cmd_help(void)
printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT\n");
printf("\t default benchmark is builtin fill_buf\n");
printf("\t-t test list: run tests specified in the test list, ");
- printf("e.g. -t mbm, mba, cmt, cat\n");
+ printf("e.g. -t mbm,mba,cmt,cat\n");
printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n");
printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n");
printf("\t-h: help\n");
@@ -173,7 +173,7 @@ int main(int argc, char **argv)
return -1;
}
- token = strtok(NULL, ":\t");
+ token = strtok(NULL, ",");
}
break;
case 'p':
diff --git a/tools/testing/selftests/rlimits/.gitignore b/tools/testing/selftests/rlimits/.gitignore
new file mode 100644
index 000000000000..091021f255b3
--- /dev/null
+++ b/tools/testing/selftests/rlimits/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+rlimits-per-userns
diff --git a/tools/testing/selftests/rlimits/Makefile b/tools/testing/selftests/rlimits/Makefile
new file mode 100644
index 000000000000..03aadb406212
--- /dev/null
+++ b/tools/testing/selftests/rlimits/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g
+TEST_GEN_PROGS := rlimits-per-userns
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rlimits/config b/tools/testing/selftests/rlimits/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/rlimits/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/rlimits/rlimits-per-userns.c b/tools/testing/selftests/rlimits/rlimits-per-userns.c
new file mode 100644
index 000000000000..26dc949e93ea
--- /dev/null
+++ b/tools/testing/selftests/rlimits/rlimits-per-userns.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Alexey Gladkov <gladkov.alexey@gmail.com>
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <err.h>
+
+#define NR_CHILDS 2
+
+static char *service_prog;
+static uid_t user = 60000;
+static uid_t group = 60000;
+
+static void setrlimit_nproc(rlim_t n)
+{
+ pid_t pid = getpid();
+ struct rlimit limit = {
+ .rlim_cur = n,
+ .rlim_max = n
+ };
+
+ warnx("(pid=%d): Setting RLIMIT_NPROC=%ld", pid, n);
+
+ if (setrlimit(RLIMIT_NPROC, &limit) < 0)
+ err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC)", pid);
+}
+
+static pid_t fork_child(void)
+{
+ pid_t pid = fork();
+
+ if (pid < 0)
+ err(EXIT_FAILURE, "fork");
+
+ if (pid > 0)
+ return pid;
+
+ pid = getpid();
+
+ warnx("(pid=%d): New process starting ...", pid);
+
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
+ err(EXIT_FAILURE, "(pid=%d): prctl(PR_SET_PDEATHSIG)", pid);
+
+ signal(SIGUSR1, SIG_DFL);
+
+ warnx("(pid=%d): Changing to uid=%d, gid=%d", pid, user, group);
+
+ if (setgid(group) < 0)
+ err(EXIT_FAILURE, "(pid=%d): setgid(%d)", pid, group);
+ if (setuid(user) < 0)
+ err(EXIT_FAILURE, "(pid=%d): setuid(%d)", pid, user);
+
+ warnx("(pid=%d): Service running ...", pid);
+
+ warnx("(pid=%d): Unshare user namespace", pid);
+ if (unshare(CLONE_NEWUSER) < 0)
+ err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)");
+
+ char *const argv[] = { "service", NULL };
+ char *const envp[] = { "I_AM_SERVICE=1", NULL };
+
+ warnx("(pid=%d): Executing real service ...", pid);
+
+ execve(service_prog, argv, envp);
+ err(EXIT_FAILURE, "(pid=%d): execve", pid);
+}
+
+int main(int argc, char **argv)
+{
+ size_t i;
+ pid_t child[NR_CHILDS];
+ int wstatus[NR_CHILDS];
+ int childs = NR_CHILDS;
+ pid_t pid;
+
+ if (getenv("I_AM_SERVICE")) {
+ pause();
+ exit(EXIT_SUCCESS);
+ }
+
+ service_prog = argv[0];
+ pid = getpid();
+
+ warnx("(pid=%d) Starting testcase", pid);
+
+ /*
+ * This rlimit is not a problem for root because it can be exceeded.
+ */
+ setrlimit_nproc(1);
+
+ for (i = 0; i < NR_CHILDS; i++) {
+ child[i] = fork_child();
+ wstatus[i] = 0;
+ usleep(250000);
+ }
+
+ while (1) {
+ for (i = 0; i < NR_CHILDS; i++) {
+ if (child[i] <= 0)
+ continue;
+
+ errno = 0;
+ pid_t ret = waitpid(child[i], &wstatus[i], WNOHANG);
+
+ if (!ret || (!WIFEXITED(wstatus[i]) && !WIFSIGNALED(wstatus[i])))
+ continue;
+
+ if (ret < 0 && errno != ECHILD)
+ warn("(pid=%d): waitpid(%d)", pid, child[i]);
+
+ child[i] *= -1;
+ childs -= 1;
+ }
+
+ if (!childs)
+ break;
+
+ usleep(250000);
+
+ for (i = 0; i < NR_CHILDS; i++) {
+ if (child[i] <= 0)
+ continue;
+ kill(child[i], SIGUSR1);
+ }
+ }
+
+ for (i = 0; i < NR_CHILDS; i++) {
+ if (WIFEXITED(wstatus[i]))
+ warnx("(pid=%d): pid %d exited, status=%d",
+ pid, -child[i], WEXITSTATUS(wstatus[i]));
+ else if (WIFSIGNALED(wstatus[i]))
+ warnx("(pid=%d): pid %d killed by signal %d",
+ pid, -child[i], WTERMSIG(wstatus[i]));
+
+ if (WIFSIGNALED(wstatus[i]) && WTERMSIG(wstatus[i]) == SIGUSR1)
+ continue;
+
+ warnx("(pid=%d): Test failed", pid);
+ exit(EXIT_FAILURE);
+ }
+
+ warnx("(pid=%d): Test passed", pid);
+ exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c
index 0c4d50644c13..4b809c93ba36 100644
--- a/tools/testing/selftests/safesetid/safesetid-test.c
+++ b/tools/testing/selftests/safesetid/safesetid-test.c
@@ -152,7 +152,7 @@ static void write_policies(void)
fd = open(add_whitelist_policy_file, O_WRONLY);
if (fd < 0)
- die("cant open add_whitelist_policy file\n");
+ die("can't open add_whitelist_policy file\n");
written = write(fd, policy_str, strlen(policy_str));
if (written != strlen(policy_str)) {
if (written >= 0) {
diff --git a/tools/testing/selftests/sched/.gitignore b/tools/testing/selftests/sched/.gitignore
new file mode 100644
index 000000000000..6996d4654d92
--- /dev/null
+++ b/tools/testing/selftests/sched/.gitignore
@@ -0,0 +1 @@
+cs_prctl_test
diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile
new file mode 100644
index 000000000000..10c72f14fea9
--- /dev/null
+++ b/tools/testing/selftests/sched/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
+CLANG_FLAGS += -no-integrated-as
+endif
+
+CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -Wl,-rpath=./ \
+ $(CLANG_FLAGS)
+LDLIBS += -lpthread
+
+TEST_GEN_FILES := cs_prctl_test
+TEST_PROGS := cs_prctl_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/sched/config b/tools/testing/selftests/sched/config
new file mode 100644
index 000000000000..e8b09aa7c0c4
--- /dev/null
+++ b/tools/testing/selftests/sched/config
@@ -0,0 +1 @@
+CONFIG_SCHED_DEBUG=y
diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c
new file mode 100644
index 000000000000..7db9cf822dc7
--- /dev/null
+++ b/tools/testing/selftests/sched/cs_prctl_test.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Use the core scheduling prctl() to test core scheduling cookies control.
+ *
+ * Copyright (c) 2021 Oracle and/or its affiliates.
+ * Author: Chris Hyser <chris.hyser@oracle.com>
+ *
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses>.
+ */
+
+#define _GNU_SOURCE
+#include <sys/eventfd.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sched.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if __GLIBC_PREREQ(2, 30) == 0
+#include <sys/syscall.h>
+static pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
+#ifndef PR_SCHED_CORE
+#define PR_SCHED_CORE 62
+# define PR_SCHED_CORE_GET 0
+# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+# define PR_SCHED_CORE_MAX 4
+#endif
+
+#define MAX_PROCESSES 128
+#define MAX_THREADS 128
+
+static const char USAGE[] = "cs_prctl_test [options]\n"
+" options:\n"
+" -P : number of processes to create.\n"
+" -T : number of threads per process to create.\n"
+" -d : delay time to keep tasks alive.\n"
+" -k : keep tasks alive until keypress.\n";
+
+enum pid_type {PIDTYPE_PID = 0, PIDTYPE_TGID, PIDTYPE_PGID};
+
+const int THREAD_CLONE_FLAGS = CLONE_THREAD | CLONE_SIGHAND | CLONE_FS | CLONE_VM | CLONE_FILES;
+
+static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+{
+ int res;
+
+ res = prctl(option, arg2, arg3, arg4, arg5);
+ printf("%d = prctl(%d, %ld, %ld, %ld, %lx)\n", res, option, (long)arg2, (long)arg3,
+ (long)arg4, arg5);
+ return res;
+}
+
+#define STACK_SIZE (1024 * 1024)
+
+#define handle_error(msg) __handle_error(__FILE__, __LINE__, msg)
+static void __handle_error(char *fn, int ln, char *msg)
+{
+ printf("(%s:%d) - ", fn, ln);
+ perror(msg);
+ exit(EXIT_FAILURE);
+}
+
+static void handle_usage(int rc, char *msg)
+{
+ puts(USAGE);
+ puts(msg);
+ putchar('\n');
+ exit(rc);
+}
+
+static unsigned long get_cs_cookie(int pid)
+{
+ unsigned long long cookie;
+ int ret;
+
+ ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid, PIDTYPE_PID,
+ (unsigned long)&cookie);
+ if (ret) {
+ printf("Not a core sched system\n");
+ return -1UL;
+ }
+
+ return cookie;
+}
+
+struct child_args {
+ int num_threads;
+ int pfd[2];
+ int cpid;
+ int thr_tids[MAX_THREADS];
+};
+
+static int child_func_thread(void __attribute__((unused))*arg)
+{
+ while (1)
+ usleep(20000);
+ return 0;
+}
+
+static void create_threads(int num_threads, int thr_tids[])
+{
+ void *child_stack;
+ pid_t tid;
+ int i;
+
+ for (i = 0; i < num_threads; ++i) {
+ child_stack = malloc(STACK_SIZE);
+ if (!child_stack)
+ handle_error("child stack allocate");
+
+ tid = clone(child_func_thread, child_stack + STACK_SIZE, THREAD_CLONE_FLAGS, NULL);
+ if (tid == -1)
+ handle_error("clone thread");
+ thr_tids[i] = tid;
+ }
+}
+
+static int child_func_process(void *arg)
+{
+ struct child_args *ca = (struct child_args *)arg;
+
+ close(ca->pfd[0]);
+
+ create_threads(ca->num_threads, ca->thr_tids);
+
+ write(ca->pfd[1], &ca->thr_tids, sizeof(int) * ca->num_threads);
+ close(ca->pfd[1]);
+
+ while (1)
+ usleep(20000);
+ return 0;
+}
+
+static unsigned char child_func_process_stack[STACK_SIZE];
+
+void create_processes(int num_processes, int num_threads, struct child_args proc[])
+{
+ pid_t cpid;
+ int i;
+
+ for (i = 0; i < num_processes; ++i) {
+ proc[i].num_threads = num_threads;
+
+ if (pipe(proc[i].pfd) == -1)
+ handle_error("pipe() failed");
+
+ cpid = clone(child_func_process, child_func_process_stack + STACK_SIZE,
+ SIGCHLD, &proc[i]);
+ proc[i].cpid = cpid;
+ close(proc[i].pfd[1]);
+ }
+
+ for (i = 0; i < num_processes; ++i) {
+ read(proc[i].pfd[0], &proc[i].thr_tids, sizeof(int) * proc[i].num_threads);
+ close(proc[i].pfd[0]);
+ }
+}
+
+void disp_processes(int num_processes, struct child_args proc[])
+{
+ int i, j;
+
+ printf("tid=%d, / tgid=%d / pgid=%d: %lx\n", gettid(), getpid(), getpgid(0),
+ get_cs_cookie(getpid()));
+
+ for (i = 0; i < num_processes; ++i) {
+ printf(" tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].cpid, proc[i].cpid,
+ getpgid(proc[i].cpid), get_cs_cookie(proc[i].cpid));
+ for (j = 0; j < proc[i].num_threads; ++j) {
+ printf(" tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].thr_tids[j],
+ proc[i].cpid, getpgid(0), get_cs_cookie(proc[i].thr_tids[j]));
+ }
+ }
+ puts("\n");
+}
+
+static int errors;
+
+#define validate(v) _validate(__LINE__, v, #v)
+void _validate(int line, int val, char *msg)
+{
+ if (!val) {
+ ++errors;
+ printf("(%d) FAILED: %s\n", line, msg);
+ } else {
+ printf("(%d) PASSED: %s\n", line, msg);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct child_args procs[MAX_PROCESSES];
+
+ int keypress = 0;
+ int num_processes = 2;
+ int num_threads = 3;
+ int delay = 0;
+ int res = 0;
+ int pidx;
+ int pid;
+ int opt;
+
+ while ((opt = getopt(argc, argv, ":hkT:P:d:")) != -1) {
+ switch (opt) {
+ case 'P':
+ num_processes = (int)strtol(optarg, NULL, 10);
+ break;
+ case 'T':
+ num_threads = (int)strtoul(optarg, NULL, 10);
+ break;
+ case 'd':
+ delay = (int)strtol(optarg, NULL, 10);
+ break;
+ case 'k':
+ keypress = 1;
+ break;
+ case 'h':
+ printf(USAGE);
+ exit(EXIT_SUCCESS);
+ default:
+ handle_usage(20, "unknown option");
+ }
+ }
+
+ if (num_processes < 1 || num_processes > MAX_PROCESSES)
+ handle_usage(1, "Bad processes value");
+
+ if (num_threads < 1 || num_threads > MAX_THREADS)
+ handle_usage(2, "Bad thread value");
+
+ if (keypress)
+ delay = -1;
+
+ srand(time(NULL));
+
+ /* put into separate process group */
+ if (setpgid(0, 0) != 0)
+ handle_error("process group");
+
+ printf("\n## Create a thread/process/process group hiearchy\n");
+ create_processes(num_processes, num_threads, procs);
+ disp_processes(num_processes, procs);
+ validate(get_cs_cookie(0) == 0);
+
+ printf("\n## Set a cookie on entire process group\n");
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, 0, PIDTYPE_PGID, 0) < 0)
+ handle_error("core_sched create failed -- PGID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) != 0);
+
+ /* get a random process pid */
+ pidx = rand() % num_processes;
+ pid = procs[pidx].cpid;
+
+ validate(get_cs_cookie(0) == get_cs_cookie(pid));
+ validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Set a new cookie on entire process/TGID [%d]\n", pid);
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid, PIDTYPE_TGID, 0) < 0)
+ handle_error("core_sched create failed -- TGID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) != get_cs_cookie(pid));
+ validate(get_cs_cookie(pid) != 0);
+ validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Copy the cookie of current/PGID[%d], to pid [%d] as PIDTYPE_PID\n",
+ getpid(), pid);
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, pid, PIDTYPE_PID, 0) < 0)
+ handle_error("core_sched share to itself failed -- PID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) == get_cs_cookie(pid));
+ validate(get_cs_cookie(pid) != 0);
+ validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Copy cookie from a thread [%d] to current/PGID [%d] as PIDTYPE_PID\n",
+ procs[pidx].thr_tids[0], getpid());
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, procs[pidx].thr_tids[0],
+ PIDTYPE_PID, 0) < 0)
+ handle_error("core_sched share from thread failed -- PID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0]));
+ validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Copy cookie from current [%d] to current as pidtype PGID\n", getpid());
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, 0, PIDTYPE_PGID, 0) < 0)
+ handle_error("core_sched share to self failed -- PGID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) == get_cs_cookie(pid));
+ validate(get_cs_cookie(pid) != 0);
+ validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ if (errors) {
+ printf("TESTS FAILED. errors: %d\n", errors);
+ res = 10;
+ } else {
+ printf("SUCCESS !!!\n");
+ }
+
+ if (keypress)
+ getchar();
+ else
+ sleep(delay);
+
+ for (pidx = 0; pidx < num_processes; ++pidx)
+ kill(procs[pidx].cpid, 15);
+
+ return res;
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index fcc806585266..6e5102a7d7c9 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -143,9 +143,15 @@ int main(int argc, char *argv[])
unsigned long long native, filter1, filter2, bitmap1, bitmap2;
unsigned long long entry, per_filter1, per_filter2;
+ setbuf(stdout, NULL);
+
+ printf("Running on:\n");
+ system("uname -a");
+
printf("Current BPF sysctl settings:\n");
- system("sysctl net.core.bpf_jit_enable");
- system("sysctl net.core.bpf_jit_harden");
+ /* Avoid using "sysctl" which may not be installed. */
+ system("grep -H . /proc/sys/net/core/bpf_jit_enable");
+ system("grep -H . /proc/sys/net/core/bpf_jit_harden");
if (argc > 1)
samples = strtoull(argv[1], NULL, 0);
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e3d5c77a8612..1d64891e6492 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -235,6 +235,10 @@ struct seccomp_notif_addfd {
};
#endif
+#ifndef SECCOMP_ADDFD_FLAG_SEND
+#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
+#endif
+
struct seccomp_notif_addfd_small {
__u64 id;
char weird[4];
@@ -3959,7 +3963,7 @@ TEST(user_notification_addfd)
{
pid_t pid;
long ret;
- int status, listener, memfd, fd;
+ int status, listener, memfd, fd, nextfd;
struct seccomp_notif_addfd addfd = {};
struct seccomp_notif_addfd_small small = {};
struct seccomp_notif_addfd_big big = {};
@@ -3968,25 +3972,34 @@ TEST(user_notification_addfd)
/* 100 ms */
struct timespec delay = { .tv_nsec = 100000000 };
+ /* There may be arbitrary already-open fds at test start. */
memfd = memfd_create("test", 0);
ASSERT_GE(memfd, 0);
+ nextfd = memfd + 1;
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret) {
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
+ /* fd: 4 */
/* Check that the basic notification machinery works */
listener = user_notif_syscall(__NR_getppid,
SECCOMP_FILTER_FLAG_NEW_LISTENER);
- ASSERT_GE(listener, 0);
+ ASSERT_EQ(listener, nextfd++);
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) {
+ /* fds will be added and this value is expected */
if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
exit(1);
+
+ /* Atomic addfd+send is received here. Check it is a valid fd */
+ if (fcntl(syscall(__NR_getppid), F_GETFD) == -1)
+ exit(1);
+
exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
}
@@ -4028,14 +4041,14 @@ TEST(user_notification_addfd)
/* Verify we can set an arbitrary remote fd */
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
- EXPECT_GE(fd, 0);
+ EXPECT_EQ(fd, nextfd++);
EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
/* Verify we can set an arbitrary remote fd with large size */
memset(&big, 0x0, sizeof(big));
big.addfd = addfd;
fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
- EXPECT_GE(fd, 0);
+ EXPECT_EQ(fd, nextfd++);
/* Verify we can set a specific remote fd */
addfd.newfd = 42;
@@ -4065,6 +4078,32 @@ TEST(user_notification_addfd)
ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
ASSERT_EQ(addfd.id, req.id);
+ /* Verify we can do an atomic addfd and send */
+ addfd.newfd = 0;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ /*
+ * Child has earlier "low" fds and now 42, so we expect the next
+ * lowest available fd to be assigned here.
+ */
+ EXPECT_EQ(fd, nextfd++);
+ EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
+
+ /*
+ * This sets the ID of the ADD FD to the last request plus 1. The
+ * notification ID increments 1 per notification.
+ */
+ addfd.id = req.id + 1;
+
+ /* This spins until the underlying notification is generated */
+ while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
+ errno != -EINPROGRESS)
+ nanosleep(&delay, NULL);
+
+ memset(&req, 0, sizeof(req));
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ ASSERT_EQ(addfd.id, req.id);
+
resp.id = req.id;
resp.error = 0;
resp.val = USER_NOTIF_MAGIC;
@@ -4125,6 +4164,10 @@ TEST(user_notification_addfd_rlimit)
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
EXPECT_EQ(errno, EMFILE);
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EMFILE);
+
addfd.newfd = 100;
addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
diff --git a/tools/testing/selftests/sgx/call.S b/tools/testing/selftests/sgx/call.S
index 4ecadc7490f4..b09a25890f3b 100644
--- a/tools/testing/selftests/sgx/call.S
+++ b/tools/testing/selftests/sgx/call.S
@@ -5,8 +5,8 @@
.text
- .global sgx_call_vdso
-sgx_call_vdso:
+ .global sgx_enter_enclave
+sgx_enter_enclave:
.cfi_startproc
push %r15
.cfi_adjust_cfa_offset 8
@@ -27,7 +27,7 @@ sgx_call_vdso:
.cfi_adjust_cfa_offset 8
push 0x38(%rsp)
.cfi_adjust_cfa_offset 8
- call *eenter(%rip)
+ call *vdso_sgx_enter_enclave(%rip)
add $0x10, %rsp
.cfi_adjust_cfa_offset -0x10
pop %rbx
diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h
index 0bd73428d2f3..f88562afcaa0 100644
--- a/tools/testing/selftests/sgx/defines.h
+++ b/tools/testing/selftests/sgx/defines.h
@@ -18,4 +18,14 @@
#include "../../../../arch/x86/include/asm/enclu.h"
#include "../../../../arch/x86/include/uapi/asm/sgx.h"
+enum encl_op_type {
+ ENCL_OP_PUT,
+ ENCL_OP_GET,
+};
+
+struct encl_op {
+ uint64_t type;
+ uint64_t buffer;
+};
+
#endif /* DEFINES_H */
diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
index f441ac34b4d4..3ebe5d1fe337 100644
--- a/tools/testing/selftests/sgx/load.c
+++ b/tools/testing/selftests/sgx/load.c
@@ -150,16 +150,6 @@ bool encl_load(const char *path, struct encl *encl)
goto err;
}
- /*
- * This just checks if the /dev file has these permission
- * bits set. It does not check that the current user is
- * the owner or in the owning group.
- */
- if (!(sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) {
- fprintf(stderr, "no execute permissions on device file %s\n", device_path);
- goto err;
- }
-
ptr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0);
if (ptr == (void *)-1) {
perror("mmap for read");
@@ -169,13 +159,13 @@ bool encl_load(const char *path, struct encl *encl)
#define ERR_MSG \
"mmap() succeeded for PROT_READ, but failed for PROT_EXEC.\n" \
-" Check that current user has execute permissions on %s and \n" \
-" that /dev does not have noexec set: mount | grep \"/dev .*noexec\"\n" \
+" Check that /dev does not have noexec set:\n" \
+" \tmount | grep \"/dev .*noexec\"\n" \
" If so, remount it executable: mount -o remount,exec /dev\n\n"
ptr = mmap(NULL, PAGE_SIZE, PROT_EXEC, MAP_SHARED, fd, 0);
if (ptr == (void *)-1) {
- fprintf(stderr, ERR_MSG, device_path);
+ fprintf(stderr, ERR_MSG);
goto err;
}
munmap(ptr, PAGE_SIZE);
@@ -239,9 +229,6 @@ bool encl_load(const char *path, struct encl *encl)
seg->offset = (phdr->p_offset & PAGE_MASK) - src_offset;
seg->size = (phdr->p_filesz + PAGE_SIZE - 1) & PAGE_MASK;
- printf("0x%016lx 0x%016lx 0x%02x\n", seg->offset, seg->size,
- seg->prot);
-
j++;
}
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index d304a4044eb9..e252015e0c15 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -17,11 +17,11 @@
#include <sys/types.h>
#include <sys/auxv.h>
#include "defines.h"
+#include "../kselftest_harness.h"
#include "main.h"
-#include "../kselftest.h"
static const uint64_t MAGIC = 0x1122334455667788ULL;
-vdso_sgx_enter_enclave_t eenter;
+vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave;
struct vdso_symtab {
Elf64_Sym *elf_symtab;
@@ -107,85 +107,51 @@ static Elf64_Sym *vdso_symtab_get(struct vdso_symtab *symtab, const char *name)
return NULL;
}
-bool report_results(struct sgx_enclave_run *run, int ret, uint64_t result,
- const char *test)
-{
- bool valid = true;
-
- if (ret) {
- printf("FAIL: %s() returned: %d\n", test, ret);
- valid = false;
- }
-
- if (run->function != EEXIT) {
- printf("FAIL: %s() function, expected: %u, got: %u\n", test, EEXIT,
- run->function);
- valid = false;
- }
-
- if (result != MAGIC) {
- printf("FAIL: %s(), expected: 0x%lx, got: 0x%lx\n", test, MAGIC,
- result);
- valid = false;
- }
-
- if (run->user_data) {
- printf("FAIL: %s() user data, expected: 0x0, got: 0x%llx\n",
- test, run->user_data);
- valid = false;
- }
-
- return valid;
-}
-
-static int user_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r9,
- struct sgx_enclave_run *run)
-{
- run->user_data = 0;
- return 0;
-}
+FIXTURE(enclave) {
+ struct encl encl;
+ struct sgx_enclave_run run;
+};
-int main(int argc, char *argv[])
+FIXTURE_SETUP(enclave)
{
- struct sgx_enclave_run run;
+ Elf64_Sym *sgx_enter_enclave_sym = NULL;
struct vdso_symtab symtab;
- Elf64_Sym *eenter_sym;
- uint64_t result = 0;
- struct encl encl;
+ struct encl_segment *seg;
+ char maps_line[256];
+ FILE *maps_file;
unsigned int i;
void *addr;
- int ret;
- memset(&run, 0, sizeof(run));
-
- if (!encl_load("test_encl.elf", &encl)) {
- encl_delete(&encl);
+ if (!encl_load("test_encl.elf", &self->encl)) {
+ encl_delete(&self->encl);
ksft_exit_skip("cannot load enclaves\n");
}
- if (!encl_measure(&encl))
+ for (i = 0; i < self->encl.nr_segments; i++) {
+ seg = &self->encl.segment_tbl[i];
+
+ TH_LOG("0x%016lx 0x%016lx 0x%02x", seg->offset, seg->size, seg->prot);
+ }
+
+ if (!encl_measure(&self->encl))
goto err;
- if (!encl_build(&encl))
+ if (!encl_build(&self->encl))
goto err;
/*
* An enclave consumer only must do this.
*/
- for (i = 0; i < encl.nr_segments; i++) {
- struct encl_segment *seg = &encl.segment_tbl[i];
-
- addr = mmap((void *)encl.encl_base + seg->offset, seg->size,
- seg->prot, MAP_SHARED | MAP_FIXED, encl.fd, 0);
- if (addr == MAP_FAILED) {
- perror("mmap() segment failed");
- exit(KSFT_FAIL);
- }
+ for (i = 0; i < self->encl.nr_segments; i++) {
+ struct encl_segment *seg = &self->encl.segment_tbl[i];
+
+ addr = mmap((void *)self->encl.encl_base + seg->offset, seg->size,
+ seg->prot, MAP_SHARED | MAP_FIXED, self->encl.fd, 0);
+ EXPECT_NE(addr, MAP_FAILED);
+ if (addr == MAP_FAILED)
+ goto err;
}
- memset(&run, 0, sizeof(run));
- run.tcs = encl.encl_base;
-
/* Get vDSO base address */
addr = (void *)getauxval(AT_SYSINFO_EHDR);
if (!addr)
@@ -194,37 +160,134 @@ int main(int argc, char *argv[])
if (!vdso_get_symtab(addr, &symtab))
goto err;
- eenter_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave");
- if (!eenter_sym)
+ sgx_enter_enclave_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave");
+ if (!sgx_enter_enclave_sym)
goto err;
- eenter = addr + eenter_sym->st_value;
-
- ret = sgx_call_vdso((void *)&MAGIC, &result, 0, EENTER, NULL, NULL, &run);
- if (!report_results(&run, ret, result, "sgx_call_vdso"))
- goto err;
+ vdso_sgx_enter_enclave = addr + sgx_enter_enclave_sym->st_value;
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
- /* Invoke the vDSO directly. */
- result = 0;
- ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER,
- 0, 0, &run);
- if (!report_results(&run, ret, result, "eenter"))
- goto err;
+ maps_file = fopen("/proc/self/maps", "r");
+ if (maps_file != NULL) {
+ while (fgets(maps_line, sizeof(maps_line), maps_file) != NULL) {
+ maps_line[strlen(maps_line) - 1] = '\0';
- /* And with an exit handler. */
- run.user_handler = (__u64)user_handler;
- run.user_data = 0xdeadbeef;
- ret = eenter((unsigned long)&MAGIC, (unsigned long)&result, 0, EENTER,
- 0, 0, &run);
- if (!report_results(&run, ret, result, "user_handler"))
- goto err;
+ if (strstr(maps_line, "/dev/sgx_enclave"))
+ TH_LOG("%s", maps_line);
+ }
- printf("SUCCESS\n");
- encl_delete(&encl);
- exit(KSFT_PASS);
+ fclose(maps_file);
+ }
err:
- encl_delete(&encl);
- exit(KSFT_FAIL);
+ if (!sgx_enter_enclave_sym)
+ encl_delete(&self->encl);
+
+ ASSERT_NE(sgx_enter_enclave_sym, NULL);
+}
+
+FIXTURE_TEARDOWN(enclave)
+{
+ encl_delete(&self->encl);
+}
+
+#define ENCL_CALL(op, run, clobbered) \
+ ({ \
+ int ret; \
+ if ((clobbered)) \
+ ret = vdso_sgx_enter_enclave((unsigned long)(op), 0, 0, \
+ EENTER, 0, 0, (run)); \
+ else \
+ ret = sgx_enter_enclave((void *)(op), NULL, 0, EENTER, NULL, NULL, \
+ (run)); \
+ ret; \
+ })
+
+#define EXPECT_EEXIT(run) \
+ do { \
+ EXPECT_EQ((run)->function, EEXIT); \
+ if ((run)->function != EEXIT) \
+ TH_LOG("0x%02x 0x%02x 0x%016llx", (run)->exception_vector, \
+ (run)->exception_error_code, (run)->exception_addr); \
+ } while (0)
+
+TEST_F(enclave, unclobbered_vdso)
+{
+ struct encl_op op;
+
+ op.type = ENCL_OP_PUT;
+ op.buffer = MAGIC;
+
+ EXPECT_EQ(ENCL_CALL(&op, &self->run, false), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+
+ op.type = ENCL_OP_GET;
+ op.buffer = 0;
+
+ EXPECT_EQ(ENCL_CALL(&op, &self->run, false), 0);
+
+ EXPECT_EQ(op.buffer, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+}
+
+TEST_F(enclave, clobbered_vdso)
+{
+ struct encl_op op;
+
+ op.type = ENCL_OP_PUT;
+ op.buffer = MAGIC;
+
+ EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+
+ op.type = ENCL_OP_GET;
+ op.buffer = 0;
+
+ EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0);
+
+ EXPECT_EQ(op.buffer, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
}
+
+static int test_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r9,
+ struct sgx_enclave_run *run)
+{
+ run->user_data = 0;
+
+ return 0;
+}
+
+TEST_F(enclave, clobbered_vdso_and_user_function)
+{
+ struct encl_op op;
+
+ self->run.user_handler = (__u64)test_handler;
+ self->run.user_data = 0xdeadbeef;
+
+ op.type = ENCL_OP_PUT;
+ op.buffer = MAGIC;
+
+ EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+
+ op.type = ENCL_OP_GET;
+ op.buffer = 0;
+
+ EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0);
+
+ EXPECT_EQ(op.buffer, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
index 67211a708f04..68672fd86cf9 100644
--- a/tools/testing/selftests/sgx/main.h
+++ b/tools/testing/selftests/sgx/main.h
@@ -35,7 +35,7 @@ bool encl_load(const char *path, struct encl *encl);
bool encl_measure(struct encl *encl);
bool encl_build(struct encl *encl);
-int sgx_call_vdso(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9,
- struct sgx_enclave_run *run);
+int sgx_enter_enclave(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9,
+ struct sgx_enclave_run *run);
#endif /* MAIN_H */
diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
index dee7a3d6c5a5..92bbc5a15c39 100644
--- a/tools/testing/selftests/sgx/sigstruct.c
+++ b/tools/testing/selftests/sgx/sigstruct.c
@@ -55,10 +55,27 @@ static bool alloc_q1q2_ctx(const uint8_t *s, const uint8_t *m,
return true;
}
+static void reverse_bytes(void *data, int length)
+{
+ int i = 0;
+ int j = length - 1;
+ uint8_t temp;
+ uint8_t *ptr = data;
+
+ while (i < j) {
+ temp = ptr[i];
+ ptr[i] = ptr[j];
+ ptr[j] = temp;
+ i++;
+ j--;
+ }
+}
+
static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1,
uint8_t *q2)
{
struct q1q2_ctx ctx;
+ int len;
if (!alloc_q1q2_ctx(s, m, &ctx)) {
fprintf(stderr, "Not enough memory for Q1Q2 calculation\n");
@@ -89,8 +106,10 @@ static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1,
goto out;
}
- BN_bn2bin(ctx.q1, q1);
- BN_bn2bin(ctx.q2, q2);
+ len = BN_bn2bin(ctx.q1, q1);
+ reverse_bytes(q1, len);
+ len = BN_bn2bin(ctx.q2, q2);
+ reverse_bytes(q2, len);
free_q1q2_ctx(&ctx);
return true;
@@ -152,22 +171,6 @@ static RSA *gen_sign_key(void)
return key;
}
-static void reverse_bytes(void *data, int length)
-{
- int i = 0;
- int j = length - 1;
- uint8_t temp;
- uint8_t *ptr = data;
-
- while (i < j) {
- temp = ptr[i];
- ptr[i] = ptr[j];
- ptr[j] = temp;
- i++;
- j--;
- }
-}
-
enum mrtags {
MRECREATE = 0x0045544145524345,
MREADD = 0x0000000044444145,
@@ -367,8 +370,6 @@ bool encl_measure(struct encl *encl)
/* BE -> LE */
reverse_bytes(sigstruct->signature, SGX_MODULUS_SIZE);
reverse_bytes(sigstruct->modulus, SGX_MODULUS_SIZE);
- reverse_bytes(sigstruct->q1, SGX_MODULUS_SIZE);
- reverse_bytes(sigstruct->q2, SGX_MODULUS_SIZE);
EVP_MD_CTX_destroy(ctx);
RSA_free(key);
diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c
index cf25b5dc1e03..734ea52f9924 100644
--- a/tools/testing/selftests/sgx/test_encl.c
+++ b/tools/testing/selftests/sgx/test_encl.c
@@ -4,6 +4,8 @@
#include <stddef.h>
#include "defines.h"
+static uint8_t encl_buffer[8192] = { 1 };
+
static void *memcpy(void *dest, const void *src, size_t n)
{
size_t i;
@@ -14,7 +16,20 @@ static void *memcpy(void *dest, const void *src, size_t n)
return dest;
}
-void encl_body(void *rdi, void *rsi)
+void encl_body(void *rdi, void *rsi)
{
- memcpy(rsi, rdi, 8);
+ struct encl_op *op = (struct encl_op *)rdi;
+
+ switch (op->type) {
+ case ENCL_OP_PUT:
+ memcpy(&encl_buffer[0], &op->buffer, 8);
+ break;
+
+ case ENCL_OP_GET:
+ memcpy(&op->buffer, &encl_buffer[0], 8);
+ break;
+
+ default:
+ break;
+ }
}
diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds
index 0fbbda7e665e..a1ec64f7d91f 100644
--- a/tools/testing/selftests/sgx/test_encl.lds
+++ b/tools/testing/selftests/sgx/test_encl.lds
@@ -18,9 +18,10 @@ SECTIONS
.text : {
*(.text*)
*(.rodata*)
+ FILL(0xDEADBEEF);
+ . = ALIGN(4096);
} : text
- . = ALIGN(4096);
.data : {
*(.data*)
} : data
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index 8934a3766d20..c53b070755b6 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -17,6 +17,7 @@
#include <string.h>
#include <assert.h>
#include <errno.h>
+#include <sys/auxv.h>
#include "../kselftest.h"
@@ -24,6 +25,11 @@
#define SS_AUTODISARM (1U << 31)
#endif
+#ifndef AT_MINSIGSTKSZ
+#define AT_MINSIGSTKSZ 51
+#endif
+
+static unsigned int stack_size;
static void *sstack, *ustack;
static ucontext_t uc, sc;
static const char *msg = "[OK]\tStack preserved";
@@ -47,7 +53,7 @@ void my_usr1(int sig, siginfo_t *si, void *u)
#endif
if (sp < (unsigned long)sstack ||
- sp >= (unsigned long)sstack + SIGSTKSZ) {
+ sp >= (unsigned long)sstack + stack_size) {
ksft_exit_fail_msg("SP is not on sigaltstack\n");
}
/* put some data on stack. other sighandler will try to overwrite it */
@@ -108,6 +114,10 @@ int main(void)
stack_t stk;
int err;
+ /* Make sure more than the required minimum. */
+ stack_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ;
+ ksft_print_msg("[NOTE]\tthe stack size is %lu\n", stack_size);
+
ksft_print_header();
ksft_set_plan(3);
@@ -117,7 +127,7 @@ int main(void)
sigaction(SIGUSR1, &act, NULL);
act.sa_sigaction = my_usr2;
sigaction(SIGUSR2, &act, NULL);
- sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ sstack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if (sstack == MAP_FAILED) {
ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
@@ -139,7 +149,7 @@ int main(void)
}
stk.ss_sp = sstack;
- stk.ss_size = SIGSTKSZ;
+ stk.ss_size = stack_size;
stk.ss_flags = SS_ONSTACK | SS_AUTODISARM;
err = sigaltstack(&stk, NULL);
if (err) {
@@ -161,7 +171,7 @@ int main(void)
}
}
- ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ ustack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if (ustack == MAP_FAILED) {
ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
@@ -170,7 +180,7 @@ int main(void)
getcontext(&uc);
uc.uc_link = NULL;
uc.uc_stack.ss_sp = ustack;
- uc.uc_stack.ss_size = SIGSTKSZ;
+ uc.uc_stack.ss_size = stack_size;
makecontext(&uc, switch_fn, 0);
raise(SIGUSR1);
diff --git a/tools/testing/selftests/splice/short_splice_read.sh b/tools/testing/selftests/splice/short_splice_read.sh
index 7810d3589d9a..22b6c8910b18 100755
--- a/tools/testing/selftests/splice/short_splice_read.sh
+++ b/tools/testing/selftests/splice/short_splice_read.sh
@@ -1,21 +1,87 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+#
+# Test for mishandling of splice() on pseudofilesystems, which should catch
+# bugs like 11990a5bd7e5 ("module: Correctly truncate sysfs sections output")
+#
+# Since splice fallback was removed as part of the set_fs() rework, many of these
+# tests expect to fail now. See https://lore.kernel.org/lkml/202009181443.C2179FB@keescook/
set -e
+DIR=$(dirname "$0")
+
ret=0
+expect_success()
+{
+ title="$1"
+ shift
+
+ echo "" >&2
+ echo "$title ..." >&2
+
+ set +e
+ "$@"
+ rc=$?
+ set -e
+
+ case "$rc" in
+ 0)
+ echo "ok: $title succeeded" >&2
+ ;;
+ 1)
+ echo "FAIL: $title should work" >&2
+ ret=$(( ret + 1 ))
+ ;;
+ *)
+ echo "FAIL: something else went wrong" >&2
+ ret=$(( ret + 1 ))
+ ;;
+ esac
+}
+
+expect_failure()
+{
+ title="$1"
+ shift
+
+ echo "" >&2
+ echo "$title ..." >&2
+
+ set +e
+ "$@"
+ rc=$?
+ set -e
+
+ case "$rc" in
+ 0)
+ echo "FAIL: $title unexpectedly worked" >&2
+ ret=$(( ret + 1 ))
+ ;;
+ 1)
+ echo "ok: $title correctly failed" >&2
+ ;;
+ *)
+ echo "FAIL: something else went wrong" >&2
+ ret=$(( ret + 1 ))
+ ;;
+ esac
+}
+
do_splice()
{
filename="$1"
bytes="$2"
expected="$3"
+ report="$4"
- out=$(./splice_read "$filename" "$bytes" | cat)
+ out=$("$DIR"/splice_read "$filename" "$bytes" | cat)
if [ "$out" = "$expected" ] ; then
- echo "ok: $filename $bytes"
+ echo " matched $report" >&2
+ return 0
else
- echo "FAIL: $filename $bytes"
- ret=1
+ echo " no match: '$out' vs $report" >&2
+ return 1
fi
}
@@ -23,34 +89,45 @@ test_splice()
{
filename="$1"
+ echo " checking $filename ..." >&2
+
full=$(cat "$filename")
+ rc=$?
+ if [ $rc -ne 0 ] ; then
+ return 2
+ fi
+
two=$(echo "$full" | grep -m1 . | cut -c-2)
# Make sure full splice has the same contents as a standard read.
- do_splice "$filename" 4096 "$full"
+ echo " splicing 4096 bytes ..." >&2
+ if ! do_splice "$filename" 4096 "$full" "full read" ; then
+ return 1
+ fi
# Make sure a partial splice see the first two characters.
- do_splice "$filename" 2 "$two"
+ echo " splicing 2 bytes ..." >&2
+ if ! do_splice "$filename" 2 "$two" "'$two'" ; then
+ return 1
+ fi
+
+ return 0
}
-# proc_single_open(), seq_read()
-test_splice /proc/$$/limits
-# special open, seq_read()
-test_splice /proc/$$/comm
+### /proc/$pid/ has no splice interface; these should all fail.
+expect_failure "proc_single_open(), seq_read() splice" test_splice /proc/$$/limits
+expect_failure "special open(), seq_read() splice" test_splice /proc/$$/comm
-# proc_handler, proc_dointvec_minmax
-test_splice /proc/sys/fs/nr_open
-# proc_handler, proc_dostring
-test_splice /proc/sys/kernel/modprobe
-# proc_handler, special read
-test_splice /proc/sys/kernel/version
+### /proc/sys/ has a splice interface; these should all succeed.
+expect_success "proc_handler: proc_dointvec_minmax() splice" test_splice /proc/sys/fs/nr_open
+expect_success "proc_handler: proc_dostring() splice" test_splice /proc/sys/kernel/modprobe
+expect_success "proc_handler: special read splice" test_splice /proc/sys/kernel/version
+### /sys/ has no splice interface; these should all fail.
if ! [ -d /sys/module/test_module/sections ] ; then
- modprobe test_module
+ expect_success "test_module kernel module load" modprobe test_module
fi
-# kernfs, attr
-test_splice /sys/module/test_module/coresize
-# kernfs, binattr
-test_splice /sys/module/test_module/sections/.init.text
+expect_failure "kernfs attr splice" test_splice /sys/module/test_module/coresize
+expect_failure "kernfs binattr splice" test_splice /sys/module/test_module/sections/.init.text
exit $ret
diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
index 1ab7e8130db2..47ff5afc3727 100644
--- a/tools/testing/selftests/sync/config
+++ b/tools/testing/selftests/sync/config
@@ -1,4 +1,3 @@
CONFIG_STAGING=y
CONFIG_ANDROID=y
-CONFIG_SYNC=y
CONFIG_SW_SYNC=y
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
index 229ee185b27e..254136e3da5a 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
@@ -29,22 +29,26 @@ class SubPlugin(TdcPlugin):
return
# Check for required fields
- scapyinfo = self.args.caseinfo['scapy']
- scapy_keys = ['iface', 'count', 'packet']
- missing_keys = []
- keyfail = False
- for k in scapy_keys:
- if k not in scapyinfo:
- keyfail = True
- missing_keys.add(k)
- if keyfail:
- print('{}: Scapy block present in the test, but is missing info:'
- .format(self.sub_class))
- print('{}'.format(missing_keys))
-
- pkt = eval(scapyinfo['packet'])
- if '$' in scapyinfo['iface']:
- tpl = Template(scapyinfo['iface'])
- scapyinfo['iface'] = tpl.safe_substitute(NAMES)
- for count in range(scapyinfo['count']):
- sendp(pkt, iface=scapyinfo['iface'])
+ lscapyinfo = self.args.caseinfo['scapy']
+ if type(lscapyinfo) != list:
+ lscapyinfo = [ lscapyinfo, ]
+
+ for scapyinfo in lscapyinfo:
+ scapy_keys = ['iface', 'count', 'packet']
+ missing_keys = []
+ keyfail = False
+ for k in scapy_keys:
+ if k not in scapyinfo:
+ keyfail = True
+ missing_keys.append(k)
+ if keyfail:
+ print('{}: Scapy block present in the test, but is missing info:'
+ .format(self.sub_class))
+ print('{}'.format(missing_keys))
+
+ pkt = eval(scapyinfo['packet'])
+ if '$' in scapyinfo['iface']:
+ tpl = Template(scapyinfo['iface'])
+ scapyinfo['iface'] = tpl.safe_substitute(NAMES)
+ for count in range(scapyinfo['count']):
+ sendp(pkt, iface=scapyinfo['iface'])
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
index 4202e95e27b9..bd843ab00a58 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
@@ -406,5 +406,50 @@
"teardown": [
"$TC actions flush action ct"
]
+ },
+ {
+ "id": "3992",
+ "name": "Add ct action triggering DNAT tuple conflict",
+ "category": [
+ "actions",
+ "ct",
+ "scapy"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ [
+ "$TC qdisc del dev $DEV1 ingress",
+ 0,
+ 1,
+ 2,
+ 255
+ ],
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 1 flower ct_state -trk action ct commit nat dst addr 20.0.0.1 port 10 pipe action drop",
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 1,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.10')/TCP(sport=5000,dport=10)"
+ },
+ {
+ "iface": "$DEV0",
+ "count": 1,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "expExitCode": "0",
+ "verifyCmd": "cat /proc/net/nf_conntrack",
+ "matchPattern": "dst=10.0.0.20",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
index 6eb4c4f97060..742f2290973e 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
@@ -417,5 +417,29 @@
"teardown": [
"$TC actions flush action skbmod"
]
+ },
+ {
+ "id": "fe09",
+ "name": "Add skbmod action to mark ECN bits",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod ecn",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 1",
+ "matchPattern": "action order [0-9]*: skbmod pipe ecn",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
index 41d783254b08..2aad4caa8581 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -446,6 +446,30 @@
"teardown": []
},
{
+ "id": "ba5b",
+ "name": "Add vlan modify action for protocol 802.1Q setting priority 0",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 priority 0 index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 100",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 100 priority 0 protocol 802.1Q pipe.*index 100 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
"id": "6812",
"name": "Add vlan modify action for protocol 802.1Q",
"category": [
@@ -463,7 +487,7 @@
"cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100",
"expExitCode": "0",
"verifyCmd": "$TC actions get action vlan index 100",
- "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q pipe.*index 100 ref",
"matchCount": "0",
"teardown": [
"$TC actions flush action vlan"
@@ -487,7 +511,7 @@
"cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12",
"expExitCode": "0",
"verifyCmd": "$TC actions get action vlan index 12",
- "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad reclassify.*index 12 ref",
"matchCount": "1",
"teardown": [
"$TC actions flush action vlan"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
new file mode 100644
index 000000000000..88a20c781e49
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
@@ -0,0 +1,137 @@
+[
+ {
+ "id": "ce7d",
+ "name": "Add mq Qdisc to multi-queue device (4 queues)",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "2f82",
+ "name": "Add mq Qdisc to multi-queue device (256 queues)",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 256\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "256",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "c525",
+ "name": "Add duplicate mq Qdisc",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: mq"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "128a",
+ "name": "Delete nonexistent mq Qdisc",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "03a9",
+ "name": "Delete mq Qdisc twice",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: mq",
+ "$TC qdisc del dev $ETH root handle 1: mq"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "be0f",
+ "name": "Add mq Qdisc to single-queue device",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index cd4a27ee1466..ea04f04c173e 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -17,6 +17,7 @@ NAMES = {
'DEV1': 'v0p1',
'DEV2': '',
'DUMMY': 'dummy1',
+ 'ETH': 'eth0',
'BATCH_FILE': './batch.txt',
'BATCH_DIR': 'tmp',
# Length of time in seconds to wait before terminating a command
diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c
index 47b5bad1b393..4ef2184f1558 100644
--- a/tools/testing/selftests/timers/rtcpie.c
+++ b/tools/testing/selftests/timers/rtcpie.c
@@ -18,6 +18,8 @@
#include <stdlib.h>
#include <errno.h>
+#include "../kselftest.h"
+
/*
* This expects the new RTC class driver framework, working with
* clocks that will often not be clones of what the PC-AT had.
@@ -35,8 +37,14 @@ int main(int argc, char **argv)
switch (argc) {
case 2:
rtc = argv[1];
- /* FALLTHROUGH */
+ break;
case 1:
+ fd = open(default_rtc, O_RDONLY);
+ if (fd == -1) {
+ printf("Default RTC %s does not exist. Test Skipped!\n", default_rtc);
+ exit(KSFT_SKIP);
+ }
+ close(fd);
break;
default:
fprintf(stderr, "usage: rtctest [rtcdev] [d]\n");
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 1f651e85ed60..b02eac613fdd 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -12,6 +12,9 @@ mremap_test
on-fault-limit
transhuge-stress
protection_keys
+protection_keys_32
+protection_keys_64
+madv_populate
userfaultfd
mlock-intersect-test
mlock-random-test
@@ -21,5 +24,7 @@ va_128TBswitch
map_fixed_noreplace
write_to_hugetlbfs
hmm-tests
+memfd_secret
local_config.*
split_huge_page_test
+ksm_tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 73e1cc96d7c2..d9605bd10f2d 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -31,9 +31,11 @@ TEST_GEN_FILES += hmm-tests
TEST_GEN_FILES += hugepage-mmap
TEST_GEN_FILES += hugepage-shm
TEST_GEN_FILES += khugepaged
+TEST_GEN_FILES += madv_populate
TEST_GEN_FILES += map_fixed_noreplace
TEST_GEN_FILES += map_hugetlb
TEST_GEN_FILES += map_populate
+TEST_GEN_FILES += memfd_secret
TEST_GEN_FILES += mlock-random-test
TEST_GEN_FILES += mlock2-tests
TEST_GEN_FILES += mremap_dontunmap
@@ -43,6 +45,7 @@ TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
TEST_GEN_FILES += split_huge_page_test
+TEST_GEN_FILES += ksm_tests
ifeq ($(MACHINE),x86_64)
CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
@@ -100,7 +103,7 @@ $(1) $(1)_64: $(OUTPUT)/$(1)_64
endef
ifeq ($(CAN_BUILD_I386),1)
-$(BINARIES_32): CFLAGS += -m32
+$(BINARIES_32): CFLAGS += -m32 -mxsave
$(BINARIES_32): LDLIBS += -lrt -ldl -lm
$(BINARIES_32): $(OUTPUT)/%_32: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
@@ -108,7 +111,7 @@ $(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
endif
ifeq ($(CAN_BUILD_X86_64),1)
-$(BINARIES_64): CFLAGS += -m64
+$(BINARIES_64): CFLAGS += -m64 -mxsave
$(BINARIES_64): LDLIBS += -lrt -ldl
$(BINARIES_64): $(OUTPUT)/%_64: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
@@ -134,7 +137,7 @@ warn_32bit_failure:
endif
endif
-$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
+$(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
$(OUTPUT)/gup_test: ../../../../mm/gup_test.h
@@ -143,6 +146,8 @@ $(OUTPUT)/hmm-tests: local_config.h
# HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
$(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
+$(OUTPUT)/ksm_tests: LDLIBS += -lnuma
+
local_config.mk local_config.h: check_config.sh
/bin/sh ./check_config.sh $(CC)
diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
index 18d33684faad..fe8fcfb334e0 100644
--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
@@ -1,11 +1,14 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
set -e
if [[ $(id -u) -ne 0 ]]; then
echo "This test must be run as root. Skipping..."
- exit 0
+ exit $ksft_skip
fi
fault_limit_file=limit_in_bytes
diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c
index 1e662d59c502..fe043f67798b 100644
--- a/tools/testing/selftests/vm/gup_test.c
+++ b/tools/testing/selftests/vm/gup_test.c
@@ -6,6 +6,8 @@
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
+#include <pthread.h>
+#include <assert.h>
#include "../../../../mm/gup_test.h"
#define MB (1UL << 20)
@@ -15,6 +17,12 @@
#define FOLL_WRITE 0x01 /* check pte is writable */
#define FOLL_TOUCH 0x02 /* mark page accessed */
+static unsigned long cmd = GUP_FAST_BENCHMARK;
+static int gup_fd, repeats = 1;
+static unsigned long size = 128 * MB;
+/* Serialize prints */
+static pthread_mutex_t print_mutex = PTHREAD_MUTEX_INITIALIZER;
+
static char *cmd_to_str(unsigned long cmd)
{
switch (cmd) {
@@ -34,17 +42,55 @@ static char *cmd_to_str(unsigned long cmd)
return "Unknown command";
}
+void *gup_thread(void *data)
+{
+ struct gup_test gup = *(struct gup_test *)data;
+ int i;
+
+ /* Only report timing information on the *_BENCHMARK commands: */
+ if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
+ (cmd == PIN_LONGTERM_BENCHMARK)) {
+ for (i = 0; i < repeats; i++) {
+ gup.size = size;
+ if (ioctl(gup_fd, cmd, &gup))
+ perror("ioctl"), exit(1);
+
+ pthread_mutex_lock(&print_mutex);
+ printf("%s: Time: get:%lld put:%lld us",
+ cmd_to_str(cmd), gup.get_delta_usec,
+ gup.put_delta_usec);
+ if (gup.size != size)
+ printf(", truncated (size: %lld)", gup.size);
+ printf("\n");
+ pthread_mutex_unlock(&print_mutex);
+ }
+ } else {
+ gup.size = size;
+ if (ioctl(gup_fd, cmd, &gup)) {
+ perror("ioctl");
+ exit(1);
+ }
+
+ pthread_mutex_lock(&print_mutex);
+ printf("%s: done\n", cmd_to_str(cmd));
+ if (gup.size != size)
+ printf("Truncated (size: %lld)\n", gup.size);
+ pthread_mutex_unlock(&print_mutex);
+ }
+
+ return NULL;
+}
+
int main(int argc, char **argv)
{
struct gup_test gup = { 0 };
- unsigned long size = 128 * MB;
- int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 1;
- unsigned long cmd = GUP_FAST_BENCHMARK;
+ int filed, i, opt, nr_pages = 1, thp = -1, write = 1, nthreads = 1, ret;
int flags = MAP_PRIVATE, touch = 0;
char *file = "/dev/zero";
+ pthread_t *tid;
char *p;
- while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwWSHpz")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:F:f:abcj:tTLUuwWSHpz")) != -1) {
switch (opt) {
case 'a':
cmd = PIN_FAST_BENCHMARK;
@@ -74,6 +120,9 @@ int main(int argc, char **argv)
/* strtol, so you can pass flags in hex form */
gup.gup_flags = strtol(optarg, 0, 0);
break;
+ case 'j':
+ nthreads = atoi(optarg);
+ break;
case 'm':
size = atoi(optarg) * MB;
break;
@@ -154,8 +203,8 @@ int main(int argc, char **argv)
if (write)
gup.gup_flags |= FOLL_WRITE;
- fd = open("/sys/kernel/debug/gup_test", O_RDWR);
- if (fd == -1) {
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ if (gup_fd == -1) {
perror("open");
exit(1);
}
@@ -185,32 +234,17 @@ int main(int argc, char **argv)
p[0] = 0;
}
- /* Only report timing information on the *_BENCHMARK commands: */
- if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
- (cmd == PIN_LONGTERM_BENCHMARK)) {
- for (i = 0; i < repeats; i++) {
- gup.size = size;
- if (ioctl(fd, cmd, &gup))
- perror("ioctl"), exit(1);
-
- printf("%s: Time: get:%lld put:%lld us",
- cmd_to_str(cmd), gup.get_delta_usec,
- gup.put_delta_usec);
- if (gup.size != size)
- printf(", truncated (size: %lld)", gup.size);
- printf("\n");
- }
- } else {
- gup.size = size;
- if (ioctl(fd, cmd, &gup)) {
- perror("ioctl");
- exit(1);
- }
-
- printf("%s: done\n", cmd_to_str(cmd));
- if (gup.size != size)
- printf("Truncated (size: %lld)\n", gup.size);
+ tid = malloc(sizeof(pthread_t) * nthreads);
+ assert(tid);
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_create(&tid[i], NULL, gup_thread, &gup);
+ assert(ret == 0);
+ }
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(tid[i], NULL);
+ assert(ret == 0);
}
+ free(tid);
return 0;
}
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 5d1ac691b9f4..864f126ffd78 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1485,4 +1485,162 @@ TEST_F(hmm2, double_map)
hmm_buffer_free(buffer);
}
+/*
+ * Basic check of exclusive faulting.
+ */
+TEST_F(hmm, exclusive)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Map memory exclusively for device access. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i]++, i);
+
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i+1);
+
+ /* Check atomic access revoked */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_CHECK_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+
+ hmm_buffer_free(buffer);
+}
+
+TEST_F(hmm, exclusive_mprotect)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Map memory exclusively for device access. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, -EPERM);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Check copy-on-write works.
+ */
+TEST_F(hmm, exclusive_cow)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Map memory exclusively for device access. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ fork();
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i]++, i);
+
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i+1);
+
+ hmm_buffer_free(buffer);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
index d11d1febccc3..4a9a3afe9fd4 100644
--- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
@@ -1,11 +1,14 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
set -e
if [[ $(id -u) -ne 0 ]]; then
echo "This test must be run as root. Skipping..."
- exit 0
+ exit $ksft_skip
fi
usage_file=usage_in_bytes
diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c
index 8b75821302a7..155120b67a16 100644
--- a/tools/testing/selftests/vm/khugepaged.c
+++ b/tools/testing/selftests/vm/khugepaged.c
@@ -86,7 +86,6 @@ struct settings {
enum thp_enabled thp_enabled;
enum thp_defrag thp_defrag;
enum shmem_enabled shmem_enabled;
- bool debug_cow;
bool use_zero_page;
struct khugepaged_settings khugepaged;
};
@@ -95,7 +94,6 @@ static struct settings default_settings = {
.thp_enabled = THP_MADVISE,
.thp_defrag = THP_DEFRAG_ALWAYS,
.shmem_enabled = SHMEM_NEVER,
- .debug_cow = 0,
.use_zero_page = 0,
.khugepaged = {
.defrag = 1,
@@ -268,7 +266,6 @@ static void write_settings(struct settings *settings)
write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
write_string("shmem_enabled",
shmem_enabled_strings[settings->shmem_enabled]);
- write_num("debug_cow", settings->debug_cow);
write_num("use_zero_page", settings->use_zero_page);
write_num("khugepaged/defrag", khugepaged->defrag);
@@ -304,7 +301,6 @@ static void save_settings(void)
.thp_defrag = read_string("defrag", thp_defrag_strings),
.shmem_enabled =
read_string("shmem_enabled", shmem_enabled_strings),
- .debug_cow = read_num("debug_cow"),
.use_zero_page = read_num("use_zero_page"),
};
saved_settings.khugepaged = (struct khugepaged_settings) {
diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
new file mode 100644
index 000000000000..b61dcdb44c5b
--- /dev/null
+++ b/tools/testing/selftests/vm/ksm_tests.c
@@ -0,0 +1,662 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/mman.h>
+#include <stdbool.h>
+#include <time.h>
+#include <string.h>
+#include <numa.h>
+
+#include "../kselftest.h"
+#include "../../../../include/vdso/time64.h"
+
+#define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
+#define KSM_FP(s) (KSM_SYSFS_PATH s)
+#define KSM_SCAN_LIMIT_SEC_DEFAULT 120
+#define KSM_PAGE_COUNT_DEFAULT 10l
+#define KSM_PROT_STR_DEFAULT "rw"
+#define KSM_USE_ZERO_PAGES_DEFAULT false
+#define KSM_MERGE_ACROSS_NODES_DEFAULT true
+#define MB (1ul << 20)
+
+struct ksm_sysfs {
+ unsigned long max_page_sharing;
+ unsigned long merge_across_nodes;
+ unsigned long pages_to_scan;
+ unsigned long run;
+ unsigned long sleep_millisecs;
+ unsigned long stable_node_chains_prune_millisecs;
+ unsigned long use_zero_pages;
+};
+
+enum ksm_test_name {
+ CHECK_KSM_MERGE,
+ CHECK_KSM_UNMERGE,
+ CHECK_KSM_ZERO_PAGE_MERGE,
+ CHECK_KSM_NUMA_MERGE,
+ KSM_MERGE_TIME,
+ KSM_COW_TIME
+};
+
+static int ksm_write_sysfs(const char *file_path, unsigned long val)
+{
+ FILE *f = fopen(file_path, "w");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_path);
+ perror("fopen");
+ return 1;
+ }
+ if (fprintf(f, "%lu", val) < 0) {
+ perror("fprintf");
+ return 1;
+ }
+ fclose(f);
+
+ return 0;
+}
+
+static int ksm_read_sysfs(const char *file_path, unsigned long *val)
+{
+ FILE *f = fopen(file_path, "r");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_path);
+ perror("fopen");
+ return 1;
+ }
+ if (fscanf(f, "%lu", val) != 1) {
+ perror("fscanf");
+ return 1;
+ }
+ fclose(f);
+
+ return 0;
+}
+
+static int str_to_prot(char *prot_str)
+{
+ int prot = 0;
+
+ if ((strchr(prot_str, 'r')) != NULL)
+ prot |= PROT_READ;
+ if ((strchr(prot_str, 'w')) != NULL)
+ prot |= PROT_WRITE;
+ if ((strchr(prot_str, 'x')) != NULL)
+ prot |= PROT_EXEC;
+
+ return prot;
+}
+
+static void print_help(void)
+{
+ printf("usage: ksm_tests [-h] <test type> [-a prot] [-p page_count] [-l timeout]\n"
+ "[-z use_zero_pages] [-m merge_across_nodes] [-s size]\n");
+
+ printf("Supported <test type>:\n"
+ " -M (page merging)\n"
+ " -Z (zero pages merging)\n"
+ " -N (merging of pages in different NUMA nodes)\n"
+ " -U (page unmerging)\n"
+ " -P evaluate merging time and speed.\n"
+ " For this test, the size of duplicated memory area (in MiB)\n"
+ " must be provided using -s option\n"
+ " -C evaluate the time required to break COW of merged pages.\n\n");
+
+ printf(" -a: specify the access protections of pages.\n"
+ " <prot> must be of the form [rwx].\n"
+ " Default: %s\n", KSM_PROT_STR_DEFAULT);
+ printf(" -p: specify the number of pages to test.\n"
+ " Default: %ld\n", KSM_PAGE_COUNT_DEFAULT);
+ printf(" -l: limit the maximum running time (in seconds) for a test.\n"
+ " Default: %d seconds\n", KSM_SCAN_LIMIT_SEC_DEFAULT);
+ printf(" -z: change use_zero_pages tunable\n"
+ " Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT);
+ printf(" -m: change merge_across_nodes tunable\n"
+ " Default: %d\n", KSM_MERGE_ACROSS_NODES_DEFAULT);
+ printf(" -s: the size of duplicated memory area (in MiB)\n");
+
+ exit(0);
+}
+
+static void *allocate_memory(void *ptr, int prot, int mapping, char data, size_t map_size)
+{
+ void *map_ptr = mmap(ptr, map_size, PROT_WRITE, mapping, -1, 0);
+
+ if (!map_ptr) {
+ perror("mmap");
+ return NULL;
+ }
+ memset(map_ptr, data, map_size);
+ if (mprotect(map_ptr, map_size, prot)) {
+ perror("mprotect");
+ munmap(map_ptr, map_size);
+ return NULL;
+ }
+
+ return map_ptr;
+}
+
+static int ksm_do_scan(int scan_count, struct timespec start_time, int timeout)
+{
+ struct timespec cur_time;
+ unsigned long cur_scan, init_scan;
+
+ if (ksm_read_sysfs(KSM_FP("full_scans"), &init_scan))
+ return 1;
+ cur_scan = init_scan;
+
+ while (cur_scan < init_scan + scan_count) {
+ if (ksm_read_sysfs(KSM_FP("full_scans"), &cur_scan))
+ return 1;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time)) {
+ perror("clock_gettime");
+ return 1;
+ }
+ if ((cur_time.tv_sec - start_time.tv_sec) > timeout) {
+ printf("Scan time limit exceeded\n");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int ksm_merge_pages(void *addr, size_t size, struct timespec start_time, int timeout)
+{
+ if (madvise(addr, size, MADV_MERGEABLE)) {
+ perror("madvise");
+ return 1;
+ }
+ if (ksm_write_sysfs(KSM_FP("run"), 1))
+ return 1;
+
+ /* Since merging occurs only after 2 scans, make sure to get at least 2 full scans */
+ if (ksm_do_scan(2, start_time, timeout))
+ return 1;
+
+ return 0;
+}
+
+static bool assert_ksm_pages_count(long dupl_page_count)
+{
+ unsigned long max_page_sharing, pages_sharing, pages_shared;
+
+ if (ksm_read_sysfs(KSM_FP("pages_shared"), &pages_shared) ||
+ ksm_read_sysfs(KSM_FP("pages_sharing"), &pages_sharing) ||
+ ksm_read_sysfs(KSM_FP("max_page_sharing"), &max_page_sharing))
+ return false;
+
+ /*
+ * Since there must be at least 2 pages for merging and 1 page can be
+ * shared with the limited number of pages (max_page_sharing), sometimes
+ * there are 'leftover' pages that cannot be merged. For example, if there
+ * are 11 pages and max_page_sharing = 10, then only 10 pages will be
+ * merged and the 11th page won't be affected. As a result, when the number
+ * of duplicate pages is divided by max_page_sharing and the remainder is 1,
+ * pages_shared and pages_sharing values will be equal between dupl_page_count
+ * and dupl_page_count - 1.
+ */
+ if (dupl_page_count % max_page_sharing == 1 || dupl_page_count % max_page_sharing == 0) {
+ if (pages_shared == dupl_page_count / max_page_sharing &&
+ pages_sharing == pages_shared * (max_page_sharing - 1))
+ return true;
+ } else {
+ if (pages_shared == (dupl_page_count / max_page_sharing + 1) &&
+ pages_sharing == dupl_page_count - pages_shared)
+ return true;
+ }
+
+ return false;
+}
+
+static int ksm_save_def(struct ksm_sysfs *ksm_sysfs)
+{
+ if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) ||
+ ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) ||
+ ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) ||
+ ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) ||
+ ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) ||
+ ksm_read_sysfs(KSM_FP("stable_node_chains_prune_millisecs"),
+ &ksm_sysfs->stable_node_chains_prune_millisecs) ||
+ ksm_read_sysfs(KSM_FP("use_zero_pages"), &ksm_sysfs->use_zero_pages))
+ return 1;
+
+ return 0;
+}
+
+static int ksm_restore(struct ksm_sysfs *ksm_sysfs)
+{
+ if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) ||
+ ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) ||
+ ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) ||
+ ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) ||
+ ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) ||
+ ksm_write_sysfs(KSM_FP("stable_node_chains_prune_millisecs"),
+ ksm_sysfs->stable_node_chains_prune_millisecs) ||
+ ksm_write_sysfs(KSM_FP("use_zero_pages"), ksm_sysfs->use_zero_pages))
+ return 1;
+
+ return 0;
+}
+
+static int check_ksm_merge(int mapping, int prot, long page_count, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ /* fill pages with the same data and merge them */
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ /* verify that the right number of pages are merged */
+ if (assert_ksm_pages_count(page_count)) {
+ printf("OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+ }
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+static int check_ksm_unmerge(int mapping, int prot, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time;
+ int page_count = 2;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ /* fill pages with the same data and merge them */
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ /* change 1 byte in each of the 2 pages -- KSM must automatically unmerge them */
+ memset(map_ptr, '-', 1);
+ memset(map_ptr + page_size, '+', 1);
+
+ /* get at least 1 scan, so KSM can detect that the pages were modified */
+ if (ksm_do_scan(1, start_time, timeout))
+ goto err_out;
+
+ /* check that unmerging was successful and 0 pages are currently merged */
+ if (assert_ksm_pages_count(0)) {
+ printf("OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+ }
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+static int check_ksm_zero_page_merge(int mapping, int prot, long page_count, int timeout,
+ bool use_zero_pages, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ if (ksm_write_sysfs(KSM_FP("use_zero_pages"), use_zero_pages))
+ return KSFT_FAIL;
+
+ /* fill pages with zero and try to merge them */
+ map_ptr = allocate_memory(NULL, prot, mapping, 0, page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ /*
+ * verify that the right number of pages are merged:
+ * 1) if use_zero_pages is set to 1, empty pages are merged
+ * with the kernel zero page instead of with each other;
+ * 2) if use_zero_pages is set to 0, empty pages are not treated specially
+ * and merged as usual.
+ */
+ if (use_zero_pages && !assert_ksm_pages_count(0))
+ goto err_out;
+ else if (!use_zero_pages && !assert_ksm_pages_count(page_count))
+ goto err_out;
+
+ printf("OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_across_nodes,
+ size_t page_size)
+{
+ void *numa1_map_ptr, *numa2_map_ptr;
+ struct timespec start_time;
+ int page_count = 2;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ if (numa_available() < 0) {
+ perror("NUMA support not enabled");
+ return KSFT_SKIP;
+ }
+ if (numa_max_node() < 1) {
+ printf("At least 2 NUMA nodes must be available\n");
+ return KSFT_SKIP;
+ }
+ if (ksm_write_sysfs(KSM_FP("merge_across_nodes"), merge_across_nodes))
+ return KSFT_FAIL;
+
+ /* allocate 2 pages in 2 different NUMA nodes and fill them with the same data */
+ numa1_map_ptr = numa_alloc_onnode(page_size, 0);
+ numa2_map_ptr = numa_alloc_onnode(page_size, 1);
+ if (!numa1_map_ptr || !numa2_map_ptr) {
+ perror("numa_alloc_onnode");
+ return KSFT_FAIL;
+ }
+
+ memset(numa1_map_ptr, '*', page_size);
+ memset(numa2_map_ptr, '*', page_size);
+
+ /* try to merge the pages */
+ if (ksm_merge_pages(numa1_map_ptr, page_size, start_time, timeout) ||
+ ksm_merge_pages(numa2_map_ptr, page_size, start_time, timeout))
+ goto err_out;
+
+ /*
+ * verify that the right number of pages are merged:
+ * 1) if merge_across_nodes was enabled, 2 duplicate pages will be merged;
+ * 2) if merge_across_nodes = 0, there must be 0 merged pages, since there is
+ * only 1 unique page in each node and they can't be shared.
+ */
+ if (merge_across_nodes && !assert_ksm_pages_count(page_count))
+ goto err_out;
+ else if (!merge_across_nodes && !assert_ksm_pages_count(0))
+ goto err_out;
+
+ numa_free(numa1_map_ptr, page_size);
+ numa_free(numa2_map_ptr, page_size);
+ printf("OK\n");
+ return KSFT_PASS;
+
+err_out:
+ numa_free(numa1_map_ptr, page_size);
+ numa_free(numa2_map_ptr, page_size);
+ printf("Not OK\n");
+ return KSFT_FAIL;
+}
+
+static int ksm_merge_time(int mapping, int prot, int timeout, size_t map_size)
+{
+ void *map_ptr;
+ struct timespec start_time, end_time;
+ unsigned long scan_time_ns;
+
+ map_size *= MB;
+
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ if (ksm_merge_pages(map_ptr, map_size, start_time, timeout))
+ goto err_out;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n", map_size / MB);
+ printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+ scan_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
+ ((double)scan_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr, map_size);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, map_size);
+ return KSFT_FAIL;
+}
+
+static int ksm_cow_time(int mapping, int prot, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time, end_time;
+ unsigned long cow_time_ns;
+
+ /* page_count must be less than 2*page_size */
+ size_t page_count = 4000;
+
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+ for (size_t i = 0; i < page_count - 1; i = i + 2)
+ memset(map_ptr + page_size * i, '-', 1);
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n\n", (page_size * page_count) / MB);
+ printf("Not merged pages:\n");
+ printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+ cow_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n\n", ((page_size * (page_count / 2)) / MB) /
+ ((double)cow_time_ns / NSEC_PER_SEC));
+
+ /* Create 2000 pairs of duplicate pages */
+ for (size_t i = 0; i < page_count - 1; i = i + 2) {
+ memset(map_ptr + page_size * i, '+', i / 2 + 1);
+ memset(map_ptr + page_size * (i + 1), '+', i / 2 + 1);
+ }
+ if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ for (size_t i = 0; i < page_count - 1; i = i + 2)
+ memset(map_ptr + page_size * i, '-', 1);
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Merged pages:\n");
+ printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+ cow_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", ((page_size * (page_count / 2)) / MB) /
+ ((double)cow_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret, opt;
+ int prot = 0;
+ int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT;
+ long page_count = KSM_PAGE_COUNT_DEFAULT;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+ struct ksm_sysfs ksm_sysfs_old;
+ int test_name = CHECK_KSM_MERGE;
+ bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT;
+ bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT;
+ long size_MB = 0;
+
+ while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPC")) != -1) {
+ switch (opt) {
+ case 'a':
+ prot = str_to_prot(optarg);
+ break;
+ case 'p':
+ page_count = atol(optarg);
+ if (page_count <= 0) {
+ printf("The number of pages must be greater than 0\n");
+ return KSFT_FAIL;
+ }
+ break;
+ case 'l':
+ ksm_scan_limit_sec = atoi(optarg);
+ if (ksm_scan_limit_sec <= 0) {
+ printf("Timeout value must be greater than 0\n");
+ return KSFT_FAIL;
+ }
+ break;
+ case 'h':
+ print_help();
+ break;
+ case 'z':
+ if (strcmp(optarg, "0") == 0)
+ use_zero_pages = 0;
+ else
+ use_zero_pages = 1;
+ break;
+ case 'm':
+ if (strcmp(optarg, "0") == 0)
+ merge_across_nodes = 0;
+ else
+ merge_across_nodes = 1;
+ break;
+ case 's':
+ size_MB = atoi(optarg);
+ if (size_MB <= 0) {
+ printf("Size must be greater than 0\n");
+ return KSFT_FAIL;
+ }
+ case 'M':
+ break;
+ case 'U':
+ test_name = CHECK_KSM_UNMERGE;
+ break;
+ case 'Z':
+ test_name = CHECK_KSM_ZERO_PAGE_MERGE;
+ break;
+ case 'N':
+ test_name = CHECK_KSM_NUMA_MERGE;
+ break;
+ case 'P':
+ test_name = KSM_MERGE_TIME;
+ break;
+ case 'C':
+ test_name = KSM_COW_TIME;
+ break;
+ default:
+ return KSFT_FAIL;
+ }
+ }
+
+ if (prot == 0)
+ prot = str_to_prot(KSM_PROT_STR_DEFAULT);
+
+ if (access(KSM_SYSFS_PATH, F_OK)) {
+ printf("Config KSM not enabled\n");
+ return KSFT_SKIP;
+ }
+
+ if (ksm_save_def(&ksm_sysfs_old)) {
+ printf("Cannot save default tunables\n");
+ return KSFT_FAIL;
+ }
+
+ if (ksm_write_sysfs(KSM_FP("run"), 2) ||
+ ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) ||
+ ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
+ ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count))
+ return KSFT_FAIL;
+
+ switch (test_name) {
+ case CHECK_KSM_MERGE:
+ ret = check_ksm_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count,
+ ksm_scan_limit_sec, page_size);
+ break;
+ case CHECK_KSM_UNMERGE:
+ ret = check_ksm_unmerge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+ page_size);
+ break;
+ case CHECK_KSM_ZERO_PAGE_MERGE:
+ ret = check_ksm_zero_page_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count,
+ ksm_scan_limit_sec, use_zero_pages, page_size);
+ break;
+ case CHECK_KSM_NUMA_MERGE:
+ ret = check_ksm_numa_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+ merge_across_nodes, page_size);
+ break;
+ case KSM_MERGE_TIME:
+ if (size_MB == 0) {
+ printf("Option '-s' is required.\n");
+ return KSFT_FAIL;
+ }
+ ret = ksm_merge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+ size_MB);
+ break;
+ case KSM_COW_TIME:
+ ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
+ page_size);
+ break;
+ }
+
+ if (ksm_restore(&ksm_sysfs_old)) {
+ printf("Cannot restore default tunables\n");
+ return KSFT_FAIL;
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c
new file mode 100644
index 000000000000..b959e4ebdad4
--- /dev/null
+++ b/tools/testing/selftests/vm/madv_populate.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
+ *
+ * Copyright 2021, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include "../kselftest.h"
+
+#if defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE)
+
+/*
+ * For now, we're using 2 MiB of private anonymous memory for all tests.
+ */
+#define SIZE (2 * 1024 * 1024)
+
+static size_t pagesize;
+
+static uint64_t pagemap_get_entry(int fd, char *start)
+{
+ const unsigned long pfn = (unsigned long)start / pagesize;
+ uint64_t entry;
+ int ret;
+
+ ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
+ if (ret != sizeof(entry))
+ ksft_exit_fail_msg("reading pagemap failed\n");
+ return entry;
+}
+
+static bool pagemap_is_populated(int fd, char *start)
+{
+ uint64_t entry = pagemap_get_entry(fd, start);
+
+ /* Present or swapped. */
+ return entry & 0xc000000000000000ull;
+}
+
+static bool pagemap_is_softdirty(int fd, char *start)
+{
+ uint64_t entry = pagemap_get_entry(fd, start);
+
+ return entry & 0x0080000000000000ull;
+}
+
+static void sense_support(void)
+{
+ char *addr;
+ int ret;
+
+ addr = mmap(0, pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (!addr)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ ret = madvise(addr, pagesize, MADV_POPULATE_READ);
+ if (ret)
+ ksft_exit_skip("MADV_POPULATE_READ is not available\n");
+
+ ret = madvise(addr, pagesize, MADV_POPULATE_WRITE);
+ if (ret)
+ ksft_exit_skip("MADV_POPULATE_WRITE is not available\n");
+
+ munmap(addr, pagesize);
+}
+
+static void test_prot_read(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(!ret, "MADV_POPULATE_READ with PROT_READ\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == EINVAL,
+ "MADV_POPULATE_WRITE with PROT_READ\n");
+
+ munmap(addr, SIZE);
+}
+
+static void test_prot_write(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == EINVAL,
+ "MADV_POPULATE_READ with PROT_WRITE\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(!ret, "MADV_POPULATE_WRITE with PROT_WRITE\n");
+
+ munmap(addr, SIZE);
+}
+
+static void test_holes(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ ret = munmap(addr + pagesize, pagesize);
+ if (ret)
+ ksft_exit_fail_msg("munmap failed\n");
+
+ /* Hole in the middle */
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_READ with holes in the middle\n");
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_WRITE with holes in the middle\n");
+
+ /* Hole at end */
+ ret = madvise(addr, 2 * pagesize, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_READ with holes at the end\n");
+ ret = madvise(addr, 2 * pagesize, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_WRITE with holes at the end\n");
+
+ /* Hole at beginning */
+ ret = madvise(addr + pagesize, pagesize, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_READ with holes at the beginning\n");
+ ret = madvise(addr + pagesize, pagesize, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_WRITE with holes at the beginning\n");
+
+ munmap(addr, SIZE);
+}
+
+static bool range_is_populated(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (!pagemap_is_populated(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static bool range_is_not_populated(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (pagemap_is_populated(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static void test_populate_read(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ ksft_test_result(range_is_not_populated(addr, SIZE),
+ "range initially not populated\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(!ret, "MADV_POPULATE_READ\n");
+ ksft_test_result(range_is_populated(addr, SIZE),
+ "range is populated\n");
+
+ munmap(addr, SIZE);
+}
+
+static void test_populate_write(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ ksft_test_result(range_is_not_populated(addr, SIZE),
+ "range initially not populated\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
+ ksft_test_result(range_is_populated(addr, SIZE),
+ "range is populated\n");
+
+ munmap(addr, SIZE);
+}
+
+static bool range_is_softdirty(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (!pagemap_is_softdirty(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static bool range_is_not_softdirty(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (pagemap_is_softdirty(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static void clear_softdirty(void)
+{
+ int fd = open("/proc/self/clear_refs", O_WRONLY);
+ const char *ctrl = "4";
+ int ret;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening clear_refs failed\n");
+ ret = write(fd, ctrl, strlen(ctrl));
+ if (ret != strlen(ctrl))
+ ksft_exit_fail_msg("writing clear_refs failed\n");
+ close(fd);
+}
+
+static void test_softdirty(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /* Clear any softdirty bits. */
+ clear_softdirty();
+ ksft_test_result(range_is_not_softdirty(addr, SIZE),
+ "range is not softdirty\n");
+
+ /* Populating READ should set softdirty. */
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(!ret, "MADV_POPULATE_READ\n");
+ ksft_test_result(range_is_not_softdirty(addr, SIZE),
+ "range is not softdirty\n");
+
+ /* Populating WRITE should set softdirty. */
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
+ ksft_test_result(range_is_softdirty(addr, SIZE),
+ "range is softdirty\n");
+
+ munmap(addr, SIZE);
+}
+
+int main(int argc, char **argv)
+{
+ int err;
+
+ pagesize = getpagesize();
+
+ ksft_print_header();
+ ksft_set_plan(21);
+
+ sense_support();
+ test_prot_read();
+ test_prot_write();
+ test_holes();
+ test_populate_read();
+ test_populate_write();
+ test_softdirty();
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ return ksft_exit_pass();
+}
+
+#else /* defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) */
+
+#warning "missing MADV_POPULATE_READ or MADV_POPULATE_WRITE definition"
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_exit_skip("MADV_POPULATE_READ or MADV_POPULATE_WRITE not defined\n");
+}
+
+#endif /* defined(MADV_POPULATE_READ) && defined(MADV_POPULATE_WRITE) */
diff --git a/tools/testing/selftests/vm/memfd_secret.c b/tools/testing/selftests/vm/memfd_secret.c
new file mode 100644
index 000000000000..93e7e7ffed33
--- /dev/null
+++ b/tools/testing/selftests/vm/memfd_secret.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corporation, 2021
+ *
+ * Author: Mike Rapoport <rppt@linux.ibm.com>
+ */
+
+#define _GNU_SOURCE
+#include <sys/uio.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/resource.h>
+#include <sys/capability.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "../kselftest.h"
+
+#define fail(fmt, ...) ksft_test_result_fail(fmt, ##__VA_ARGS__)
+#define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__)
+#define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__)
+
+#ifdef __NR_memfd_secret
+
+#define PATTERN 0x55
+
+static const int prot = PROT_READ | PROT_WRITE;
+static const int mode = MAP_SHARED;
+
+static unsigned long page_size;
+static unsigned long mlock_limit_cur;
+static unsigned long mlock_limit_max;
+
+static int memfd_secret(unsigned int flags)
+{
+ return syscall(__NR_memfd_secret, flags);
+}
+
+static void test_file_apis(int fd)
+{
+ char buf[64];
+
+ if ((read(fd, buf, sizeof(buf)) >= 0) ||
+ (write(fd, buf, sizeof(buf)) >= 0) ||
+ (pread(fd, buf, sizeof(buf), 0) >= 0) ||
+ (pwrite(fd, buf, sizeof(buf), 0) >= 0))
+ fail("unexpected file IO\n");
+ else
+ pass("file IO is blocked as expected\n");
+}
+
+static void test_mlock_limit(int fd)
+{
+ size_t len;
+ char *mem;
+
+ len = mlock_limit_cur;
+ mem = mmap(NULL, len, prot, mode, fd, 0);
+ if (mem == MAP_FAILED) {
+ fail("unable to mmap secret memory\n");
+ return;
+ }
+ munmap(mem, len);
+
+ len = mlock_limit_max * 2;
+ mem = mmap(NULL, len, prot, mode, fd, 0);
+ if (mem != MAP_FAILED) {
+ fail("unexpected mlock limit violation\n");
+ munmap(mem, len);
+ return;
+ }
+
+ pass("mlock limit is respected\n");
+}
+
+static void try_process_vm_read(int fd, int pipefd[2])
+{
+ struct iovec liov, riov;
+ char buf[64];
+ char *mem;
+
+ if (read(pipefd[0], &mem, sizeof(mem)) < 0) {
+ fail("pipe write: %s\n", strerror(errno));
+ exit(KSFT_FAIL);
+ }
+
+ liov.iov_len = riov.iov_len = sizeof(buf);
+ liov.iov_base = buf;
+ riov.iov_base = mem;
+
+ if (process_vm_readv(getppid(), &liov, 1, &riov, 1, 0) < 0) {
+ if (errno == ENOSYS)
+ exit(KSFT_SKIP);
+ exit(KSFT_PASS);
+ }
+
+ exit(KSFT_FAIL);
+}
+
+static void try_ptrace(int fd, int pipefd[2])
+{
+ pid_t ppid = getppid();
+ int status;
+ char *mem;
+ long ret;
+
+ if (read(pipefd[0], &mem, sizeof(mem)) < 0) {
+ perror("pipe write");
+ exit(KSFT_FAIL);
+ }
+
+ ret = ptrace(PTRACE_ATTACH, ppid, 0, 0);
+ if (ret) {
+ perror("ptrace_attach");
+ exit(KSFT_FAIL);
+ }
+
+ ret = waitpid(ppid, &status, WUNTRACED);
+ if ((ret != ppid) || !(WIFSTOPPED(status))) {
+ fprintf(stderr, "weird waitppid result %ld stat %x\n",
+ ret, status);
+ exit(KSFT_FAIL);
+ }
+
+ if (ptrace(PTRACE_PEEKDATA, ppid, mem, 0))
+ exit(KSFT_PASS);
+
+ exit(KSFT_FAIL);
+}
+
+static void check_child_status(pid_t pid, const char *name)
+{
+ int status;
+
+ waitpid(pid, &status, 0);
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == KSFT_SKIP) {
+ skip("%s is not supported\n", name);
+ return;
+ }
+
+ if ((WIFEXITED(status) && WEXITSTATUS(status) == KSFT_PASS) ||
+ WIFSIGNALED(status)) {
+ pass("%s is blocked as expected\n", name);
+ return;
+ }
+
+ fail("%s: unexpected memory access\n", name);
+}
+
+static void test_remote_access(int fd, const char *name,
+ void (*func)(int fd, int pipefd[2]))
+{
+ int pipefd[2];
+ pid_t pid;
+ char *mem;
+
+ if (pipe(pipefd)) {
+ fail("pipe failed: %s\n", strerror(errno));
+ return;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ fail("fork failed: %s\n", strerror(errno));
+ return;
+ }
+
+ if (pid == 0) {
+ func(fd, pipefd);
+ return;
+ }
+
+ mem = mmap(NULL, page_size, prot, mode, fd, 0);
+ if (mem == MAP_FAILED) {
+ fail("Unable to mmap secret memory\n");
+ return;
+ }
+
+ ftruncate(fd, page_size);
+ memset(mem, PATTERN, page_size);
+
+ if (write(pipefd[1], &mem, sizeof(mem)) < 0) {
+ fail("pipe write: %s\n", strerror(errno));
+ return;
+ }
+
+ check_child_status(pid, name);
+}
+
+static void test_process_vm_read(int fd)
+{
+ test_remote_access(fd, "process_vm_read", try_process_vm_read);
+}
+
+static void test_ptrace(int fd)
+{
+ test_remote_access(fd, "ptrace", try_ptrace);
+}
+
+static int set_cap_limits(rlim_t max)
+{
+ struct rlimit new;
+ cap_t cap = cap_init();
+
+ new.rlim_cur = max;
+ new.rlim_max = max;
+ if (setrlimit(RLIMIT_MEMLOCK, &new)) {
+ perror("setrlimit() returns error");
+ return -1;
+ }
+
+ /* drop capabilities including CAP_IPC_LOCK */
+ if (cap_set_proc(cap)) {
+ perror("cap_set_proc() returns error");
+ return -2;
+ }
+
+ return 0;
+}
+
+static void prepare(void)
+{
+ struct rlimit rlim;
+
+ page_size = sysconf(_SC_PAGE_SIZE);
+ if (!page_size)
+ ksft_exit_fail_msg("Failed to get page size %s\n",
+ strerror(errno));
+
+ if (getrlimit(RLIMIT_MEMLOCK, &rlim))
+ ksft_exit_fail_msg("Unable to detect mlock limit: %s\n",
+ strerror(errno));
+
+ mlock_limit_cur = rlim.rlim_cur;
+ mlock_limit_max = rlim.rlim_max;
+
+ printf("page_size: %ld, mlock.soft: %ld, mlock.hard: %ld\n",
+ page_size, mlock_limit_cur, mlock_limit_max);
+
+ if (page_size > mlock_limit_cur)
+ mlock_limit_cur = page_size;
+ if (page_size > mlock_limit_max)
+ mlock_limit_max = page_size;
+
+ if (set_cap_limits(mlock_limit_max))
+ ksft_exit_fail_msg("Unable to set mlock limit: %s\n",
+ strerror(errno));
+}
+
+#define NUM_TESTS 4
+
+int main(int argc, char *argv[])
+{
+ int fd;
+
+ prepare();
+
+ ksft_print_header();
+ ksft_set_plan(NUM_TESTS);
+
+ fd = memfd_secret(0);
+ if (fd < 0) {
+ if (errno == ENOSYS)
+ ksft_exit_skip("memfd_secret is not supported\n");
+ else
+ ksft_exit_fail_msg("memfd_secret failed: %s\n",
+ strerror(errno));
+ }
+
+ test_mlock_limit(fd);
+ test_file_apis(fd);
+ test_process_vm_read(fd);
+ test_ptrace(fd);
+
+ close(fd);
+
+ ksft_exit(!ksft_get_fail_cnt());
+}
+
+#else /* __NR_memfd_secret */
+
+int main(int argc, char *argv[])
+{
+ printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_memfd_secret */
diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/vm/mlock-random-test.c
index ff4d72eb74b9..782ea94dee2f 100644
--- a/tools/testing/selftests/vm/mlock-random-test.c
+++ b/tools/testing/selftests/vm/mlock-random-test.c
@@ -70,7 +70,7 @@ int get_proc_locked_vm_size(void)
}
}
- perror("cann't parse VmLck in /proc/self/status\n");
+ perror("cannot parse VmLck in /proc/self/status\n");
fclose(f);
return -1;
}
diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index 9c391d016922..0624d1bd71b5 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -45,14 +45,15 @@ enum {
_4MB = 4ULL << 20,
_1GB = 1ULL << 30,
_2GB = 2ULL << 30,
- PTE = _4KB,
PMD = _2MB,
PUD = _1GB,
};
+#define PTE page_size
+
#define MAKE_TEST(source_align, destination_align, size, \
overlaps, should_fail, test_name) \
-{ \
+(struct test){ \
.name = test_name, \
.config = { \
.src_alignment = source_align, \
@@ -74,9 +75,10 @@ static void *get_source_mapping(struct config c)
retry:
addr += c.src_alignment;
src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
if (src_addr == MAP_FAILED) {
- if (errno == EPERM)
+ if (errno == EPERM || errno == EEXIST)
goto retry;
goto error;
}
@@ -252,12 +254,17 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
return 0;
}
+#define MAX_TEST 13
+#define MAX_PERF_TEST 3
int main(int argc, char **argv)
{
int failures = 0;
int i, run_perf_tests;
unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
unsigned int pattern_seed;
+ struct test test_cases[MAX_TEST];
+ struct test perf_test_cases[MAX_PERF_TEST];
+ int page_size;
time_t t;
pattern_seed = (unsigned int) time(&t);
@@ -268,56 +275,59 @@ int main(int argc, char **argv)
ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
threshold_mb, pattern_seed);
- struct test test_cases[] = {
- /* Expected mremap failures */
- MAKE_TEST(_4KB, _4KB, _4KB, OVERLAPPING, EXPECT_FAILURE,
- "mremap - Source and Destination Regions Overlapping"),
- MAKE_TEST(_4KB, _1KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE,
- "mremap - Destination Address Misaligned (1KB-aligned)"),
- MAKE_TEST(_1KB, _4KB, _4KB, NON_OVERLAPPING, EXPECT_FAILURE,
- "mremap - Source Address Misaligned (1KB-aligned)"),
-
- /* Src addr PTE aligned */
- MAKE_TEST(PTE, PTE, _8KB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "8KB mremap - Source PTE-aligned, Destination PTE-aligned"),
-
- /* Src addr 1MB aligned */
- MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "2MB mremap - Source 1MB-aligned, Destination PTE-aligned"),
- MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned"),
-
- /* Src addr PMD aligned */
- MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "4MB mremap - Source PMD-aligned, Destination PTE-aligned"),
- MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "4MB mremap - Source PMD-aligned, Destination 1MB-aligned"),
- MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "4MB mremap - Source PMD-aligned, Destination PMD-aligned"),
-
- /* Src addr PUD aligned */
- MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "2GB mremap - Source PUD-aligned, Destination PTE-aligned"),
- MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "2GB mremap - Source PUD-aligned, Destination 1MB-aligned"),
- MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "2GB mremap - Source PUD-aligned, Destination PMD-aligned"),
- MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "2GB mremap - Source PUD-aligned, Destination PUD-aligned"),
- };
-
- struct test perf_test_cases[] = {
- /*
- * mremap 1GB region - Page table level aligned time
- * comparison.
- */
- MAKE_TEST(PTE, PTE, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "1GB mremap - Source PTE-aligned, Destination PTE-aligned"),
- MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "1GB mremap - Source PMD-aligned, Destination PMD-aligned"),
- MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
- "1GB mremap - Source PUD-aligned, Destination PUD-aligned"),
- };
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Expected mremap failures */
+ test_cases[0] = MAKE_TEST(page_size, page_size, page_size,
+ OVERLAPPING, EXPECT_FAILURE,
+ "mremap - Source and Destination Regions Overlapping");
+
+ test_cases[1] = MAKE_TEST(page_size, page_size/4, page_size,
+ NON_OVERLAPPING, EXPECT_FAILURE,
+ "mremap - Destination Address Misaligned (1KB-aligned)");
+ test_cases[2] = MAKE_TEST(page_size/4, page_size, page_size,
+ NON_OVERLAPPING, EXPECT_FAILURE,
+ "mremap - Source Address Misaligned (1KB-aligned)");
+
+ /* Src addr PTE aligned */
+ test_cases[3] = MAKE_TEST(PTE, PTE, PTE * 2,
+ NON_OVERLAPPING, EXPECT_SUCCESS,
+ "8KB mremap - Source PTE-aligned, Destination PTE-aligned");
+
+ /* Src addr 1MB aligned */
+ test_cases[4] = MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2MB mremap - Source 1MB-aligned, Destination PTE-aligned");
+ test_cases[5] = MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned");
+
+ /* Src addr PMD aligned */
+ test_cases[6] = MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "4MB mremap - Source PMD-aligned, Destination PTE-aligned");
+ test_cases[7] = MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "4MB mremap - Source PMD-aligned, Destination 1MB-aligned");
+ test_cases[8] = MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "4MB mremap - Source PMD-aligned, Destination PMD-aligned");
+
+ /* Src addr PUD aligned */
+ test_cases[9] = MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination PTE-aligned");
+ test_cases[10] = MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination 1MB-aligned");
+ test_cases[11] = MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination PMD-aligned");
+ test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination PUD-aligned");
+
+ perf_test_cases[0] = MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "1GB mremap - Source PTE-aligned, Destination PTE-aligned");
+ /*
+ * mremap 1GB region - Page table level aligned time
+ * comparison.
+ */
+ perf_test_cases[1] = MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "1GB mremap - Source PMD-aligned, Destination PMD-aligned");
+ perf_test_cases[2] = MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "1GB mremap - Source PUD-aligned, Destination PUD-aligned");
run_perf_tests = (threshold_mb == VALIDATION_NO_THRESHOLD) ||
(threshold_mb * _1MB >= _1GB);
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
index 3be20f5d5275..e4a4ce2b826d 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -126,6 +126,7 @@ static inline u32 pkey_bit_position(int pkey)
#define XSTATE_PKEY_BIT (9)
#define XSTATE_PKEY 0x200
+#define XSTATE_BV_OFFSET 512
int pkey_reg_xstate_offset(void)
{
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index fdbb602ecf32..2d0ae88665db 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -510,7 +510,7 @@ int alloc_pkey(void)
" shadow: 0x%016llx\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
- if (ret) {
+ if (ret > 0) {
/* clear both the bits: */
shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
~PKEY_MASK);
@@ -561,7 +561,6 @@ int alloc_random_pkey(void)
int nr_alloced = 0;
int random_index;
memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
- srand((unsigned int)time(NULL));
/* allocate every possible key and make a note of which ones we got */
max_nr_pkey_allocs = NR_PKEYS;
@@ -1278,6 +1277,78 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
}
}
+void arch_force_pkey_reg_init(void)
+{
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ u64 *buf;
+
+ /*
+ * All keys should be allocated and set to allow reads and
+ * writes, so the register should be all 0. If not, just
+ * skip the test.
+ */
+ if (read_pkey_reg())
+ return;
+
+ /*
+ * Just allocate an absurd about of memory rather than
+ * doing the XSAVE size enumeration dance.
+ */
+ buf = mmap(NULL, 1*MB, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+
+ /* These __builtins require compiling with -mxsave */
+
+ /* XSAVE to build a valid buffer: */
+ __builtin_ia32_xsave(buf, XSTATE_PKEY);
+ /* Clear XSTATE_BV[PKRU]: */
+ buf[XSTATE_BV_OFFSET/sizeof(u64)] &= ~XSTATE_PKEY;
+ /* XRSTOR will likely get PKRU back to the init state: */
+ __builtin_ia32_xrstor(buf, XSTATE_PKEY);
+
+ munmap(buf, 1*MB);
+#endif
+}
+
+
+/*
+ * This is mostly useless on ppc for now. But it will not
+ * hurt anything and should give some better coverage as
+ * a long-running test that continually checks the pkey
+ * register.
+ */
+void test_pkey_init_state(int *ptr, u16 pkey)
+{
+ int err;
+ int allocated_pkeys[NR_PKEYS] = {0};
+ int nr_allocated_pkeys = 0;
+ int i;
+
+ for (i = 0; i < NR_PKEYS; i++) {
+ int new_pkey = alloc_pkey();
+
+ if (new_pkey < 0)
+ continue;
+ allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
+ }
+
+ dprintf3("%s()::%d\n", __func__, __LINE__);
+
+ arch_force_pkey_reg_init();
+
+ /*
+ * Loop for a bit, hoping to get exercise the kernel
+ * context switch code.
+ */
+ for (i = 0; i < 1000000; i++)
+ read_pkey_reg();
+
+ for (i = 0; i < nr_allocated_pkeys; i++) {
+ err = sys_pkey_free(allocated_pkeys[i]);
+ pkey_assert(!err);
+ read_pkey_reg(); /* for shadow checking */
+ }
+}
+
/*
* pkey 0 is special. It is allocated by default, so you do not
* have to call pkey_alloc() to use it first. Make sure that it
@@ -1449,6 +1520,13 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
pkey_assert(!ret);
+ /*
+ * Reset the shadow, assuming that the above mprotect()
+ * correctly changed PKRU, but to an unknown value since
+ * the actual alllocated pkey is unknown.
+ */
+ shadow_pkey_reg = __read_pkey_reg();
+
dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */
@@ -1502,6 +1580,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_implicit_mprotect_exec_only_memory,
test_mprotect_with_pkey_0,
test_ptrace_of_child,
+ test_pkey_init_state,
test_pkey_syscalls_on_non_allocated_pkey,
test_pkey_syscalls_bad_args,
test_pkey_alloc_exhaust,
@@ -1552,6 +1631,8 @@ int main(void)
int nr_iterations = 22;
int pkeys_supported = is_pkeys_supported();
+ srand((unsigned int)time(NULL));
+
setup_handlers();
printf("has pkeys: %d\n", pkeys_supported);
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index e953f3cd9664..45e803af7c77 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -346,4 +346,133 @@ else
exitcode=1
fi
+echo "--------------------------------------------------------"
+echo "running MADV_POPULATE_READ and MADV_POPULATE_WRITE tests"
+echo "--------------------------------------------------------"
+./madv_populate
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "running memfd_secret test"
+echo "------------------------------------"
+./memfd_secret
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "-------------------------------------------------------"
+echo "running KSM MADV_MERGEABLE test with 10 identical pages"
+echo "-------------------------------------------------------"
+./ksm_tests -M -p 10
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "------------------------"
+echo "running KSM unmerge test"
+echo "------------------------"
+./ksm_tests -U
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "----------------------------------------------------------"
+echo "running KSM test with 10 zero pages and use_zero_pages = 0"
+echo "----------------------------------------------------------"
+./ksm_tests -Z -p 10 -z 0
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "----------------------------------------------------------"
+echo "running KSM test with 10 zero pages and use_zero_pages = 1"
+echo "----------------------------------------------------------"
+./ksm_tests -Z -p 10 -z 1
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "-------------------------------------------------------------"
+echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 1"
+echo "-------------------------------------------------------------"
+./ksm_tests -N -m 1
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "-------------------------------------------------------------"
+echo "running KSM test with 2 NUMA nodes and merge_across_nodes = 0"
+echo "-------------------------------------------------------------"
+./ksm_tests -N -m 0
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+exit $exitcode
+
exit $exitcode
diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c
index 1af16d2c2a0a..52497b7b9f1d 100644
--- a/tools/testing/selftests/vm/split_huge_page_test.c
+++ b/tools/testing/selftests/vm/split_huge_page_test.c
@@ -341,7 +341,7 @@ void split_file_backed_thp(void)
}
/* write something to the file, so a file-backed THP can be allocated */
- num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc));
+ num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1);
close(fd);
if (num_written < 1) {
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index f5ab5e0312e7..60aa1a4fc69b 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -85,10 +85,12 @@ static bool test_uffdio_wp = false;
static bool test_uffdio_minor = false;
static bool map_shared;
+static int shm_fd;
static int huge_fd;
static char *huge_fd_off0;
static unsigned long long *count_verify;
-static int uffd, uffd_flags, finished, *pipefd;
+static int uffd = -1;
+static int uffd_flags, finished, *pipefd;
static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
static char *zeropage;
pthread_attr_t attr;
@@ -140,11 +142,18 @@ static void usage(void)
exit(1);
}
-#define uffd_error(code, fmt, ...) \
- do { \
- fprintf(stderr, fmt, ##__VA_ARGS__); \
- fprintf(stderr, ": %" PRId64 "\n", (int64_t)(code)); \
- exit(1); \
+#define _err(fmt, ...) \
+ do { \
+ int ret = errno; \
+ fprintf(stderr, "ERROR: " fmt, ##__VA_ARGS__); \
+ fprintf(stderr, " (errno=%d, line=%d)\n", \
+ ret, __LINE__); \
+ } while (0)
+
+#define err(fmt, ...) \
+ do { \
+ _err(fmt, ##__VA_ARGS__); \
+ exit(1); \
} while (0)
static void uffd_stats_reset(struct uffd_stats *uffd_stats,
@@ -171,56 +180,52 @@ static void uffd_stats_report(struct uffd_stats *stats, int n_cpus)
minor_total += stats[i].minor_faults;
}
- printf("userfaults: %llu missing (", miss_total);
- for (i = 0; i < n_cpus; i++)
- printf("%lu+", stats[i].missing_faults);
- printf("\b), %llu wp (", wp_total);
- for (i = 0; i < n_cpus; i++)
- printf("%lu+", stats[i].wp_faults);
- printf("\b), %llu minor (", minor_total);
- for (i = 0; i < n_cpus; i++)
- printf("%lu+", stats[i].minor_faults);
- printf("\b)\n");
+ printf("userfaults: ");
+ if (miss_total) {
+ printf("%llu missing (", miss_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", stats[i].missing_faults);
+ printf("\b) ");
+ }
+ if (wp_total) {
+ printf("%llu wp (", wp_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", stats[i].wp_faults);
+ printf("\b) ");
+ }
+ if (minor_total) {
+ printf("%llu minor (", minor_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", stats[i].minor_faults);
+ printf("\b)");
+ }
+ printf("\n");
}
-static int anon_release_pages(char *rel_area)
+static void anon_release_pages(char *rel_area)
{
- int ret = 0;
-
- if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
- perror("madvise");
- ret = 1;
- }
-
- return ret;
+ if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
}
static void anon_allocate_area(void **alloc_area)
{
- if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) {
- fprintf(stderr, "out of memory\n");
- *alloc_area = NULL;
- }
+ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (*alloc_area == MAP_FAILED)
+ err("mmap of anonymous memory failed");
}
static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
{
}
-/* HugeTLB memory */
-static int hugetlb_release_pages(char *rel_area)
+static void hugetlb_release_pages(char *rel_area)
{
- int ret = 0;
-
if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- rel_area == huge_fd_off0 ? 0 :
- nr_pages * page_size,
- nr_pages * page_size)) {
- perror("fallocate");
- ret = 1;
- }
-
- return ret;
+ rel_area == huge_fd_off0 ? 0 : nr_pages * page_size,
+ nr_pages * page_size))
+ err("fallocate() failed");
}
static void hugetlb_allocate_area(void **alloc_area)
@@ -233,20 +238,16 @@ static void hugetlb_allocate_area(void **alloc_area)
MAP_HUGETLB,
huge_fd, *alloc_area == area_src ? 0 :
nr_pages * page_size);
- if (*alloc_area == MAP_FAILED) {
- perror("mmap of hugetlbfs file failed");
- goto fail;
- }
+ if (*alloc_area == MAP_FAILED)
+ err("mmap of hugetlbfs file failed");
if (map_shared) {
area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_HUGETLB,
huge_fd, *alloc_area == area_src ? 0 :
nr_pages * page_size);
- if (area_alias == MAP_FAILED) {
- perror("mmap of hugetlb file alias failed");
- goto fail_munmap;
- }
+ if (area_alias == MAP_FAILED)
+ err("mmap of hugetlb file alias failed");
}
if (*alloc_area == area_src) {
@@ -257,16 +258,6 @@ static void hugetlb_allocate_area(void **alloc_area)
}
if (area_alias)
*alloc_area_alias = area_alias;
-
- return;
-
-fail_munmap:
- if (munmap(*alloc_area, nr_pages * page_size) < 0) {
- perror("hugetlb munmap");
- exit(1);
- }
-fail:
- *alloc_area = NULL;
}
static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
@@ -282,33 +273,43 @@ static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset
*start = (unsigned long) area_dst_alias + offset;
}
-/* Shared memory */
-static int shmem_release_pages(char *rel_area)
+static void shmem_release_pages(char *rel_area)
{
- int ret = 0;
-
- if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
- perror("madvise");
- ret = 1;
- }
-
- return ret;
+ if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
+ err("madvise(MADV_REMOVE) failed");
}
static void shmem_allocate_area(void **alloc_area)
{
+ void *area_alias = NULL;
+ bool is_src = alloc_area == (void **)&area_src;
+ unsigned long offset = is_src ? 0 : nr_pages * page_size;
+
*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_SHARED, -1, 0);
- if (*alloc_area == MAP_FAILED) {
- fprintf(stderr, "shared memory mmap failed\n");
- *alloc_area = NULL;
- }
+ MAP_SHARED, shm_fd, offset);
+ if (*alloc_area == MAP_FAILED)
+ err("mmap of memfd failed");
+
+ area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, shm_fd, offset);
+ if (area_alias == MAP_FAILED)
+ err("mmap of memfd alias failed");
+
+ if (is_src)
+ area_src_alias = area_alias;
+ else
+ area_dst_alias = area_alias;
+}
+
+static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+ *start = (unsigned long)area_dst_alias + offset;
}
struct uffd_test_ops {
unsigned long expected_ioctls;
void (*allocate_area)(void **alloc_area);
- int (*release_pages)(char *rel_area);
+ void (*release_pages)(char *rel_area);
void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
};
@@ -332,7 +333,7 @@ static struct uffd_test_ops shmem_uffd_test_ops = {
.expected_ioctls = SHMEM_EXPECTED_IOCTLS,
.allocate_area = shmem_allocate_area,
.release_pages = shmem_release_pages,
- .alias_mapping = noop_alias_mapping,
+ .alias_mapping = shmem_alias_mapping,
};
static struct uffd_test_ops hugetlb_uffd_test_ops = {
@@ -344,6 +345,128 @@ static struct uffd_test_ops hugetlb_uffd_test_ops = {
static struct uffd_test_ops *uffd_test_ops;
+static void userfaultfd_open(uint64_t *features)
+{
+ struct uffdio_api uffdio_api;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
+ if (uffd < 0)
+ err("userfaultfd syscall not available in this kernel");
+ uffd_flags = fcntl(uffd, F_GETFD, NULL);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = *features;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api))
+ err("UFFDIO_API failed.\nPlease make sure to "
+ "run with either root or ptrace capability.");
+ if (uffdio_api.api != UFFD_API)
+ err("UFFDIO_API error: %" PRIu64, (uint64_t)uffdio_api.api);
+
+ *features = uffdio_api.features;
+}
+
+static inline void munmap_area(void **area)
+{
+ if (*area)
+ if (munmap(*area, nr_pages * page_size))
+ err("munmap");
+
+ *area = NULL;
+}
+
+static void uffd_test_ctx_clear(void)
+{
+ size_t i;
+
+ if (pipefd) {
+ for (i = 0; i < nr_cpus * 2; ++i) {
+ if (close(pipefd[i]))
+ err("close pipefd");
+ }
+ free(pipefd);
+ pipefd = NULL;
+ }
+
+ if (count_verify) {
+ free(count_verify);
+ count_verify = NULL;
+ }
+
+ if (uffd != -1) {
+ if (close(uffd))
+ err("close uffd");
+ uffd = -1;
+ }
+
+ huge_fd_off0 = NULL;
+ munmap_area((void **)&area_src);
+ munmap_area((void **)&area_src_alias);
+ munmap_area((void **)&area_dst);
+ munmap_area((void **)&area_dst_alias);
+}
+
+static void uffd_test_ctx_init_ext(uint64_t *features)
+{
+ unsigned long nr, cpu;
+
+ uffd_test_ctx_clear();
+
+ uffd_test_ops->allocate_area((void **)&area_src);
+ uffd_test_ops->allocate_area((void **)&area_dst);
+
+ userfaultfd_open(features);
+
+ count_verify = malloc(nr_pages * sizeof(unsigned long long));
+ if (!count_verify)
+ err("count_verify");
+
+ for (nr = 0; nr < nr_pages; nr++) {
+ *area_mutex(area_src, nr) =
+ (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
+ count_verify[nr] = *area_count(area_src, nr) = 1;
+ /*
+ * In the transition between 255 to 256, powerpc will
+ * read out of order in my_bcmp and see both bytes as
+ * zero, so leave a placeholder below always non-zero
+ * after the count, to avoid my_bcmp to trigger false
+ * positives.
+ */
+ *(area_count(area_src, nr) + 1) = 1;
+ }
+
+ /*
+ * After initialization of area_src, we must explicitly release pages
+ * for area_dst to make sure it's fully empty. Otherwise we could have
+ * some area_dst pages be errornously initialized with zero pages,
+ * hence we could hit memory corruption later in the test.
+ *
+ * One example is when THP is globally enabled, above allocate_area()
+ * calls could have the two areas merged into a single VMA (as they
+ * will have the same VMA flags so they're mergeable). When we
+ * initialize the area_src above, it's possible that some part of
+ * area_dst could have been faulted in via one huge THP that will be
+ * shared between area_src and area_dst. It could cause some of the
+ * area_dst won't be trapped by missing userfaults.
+ *
+ * This release_pages() will guarantee even if that happened, we'll
+ * proactively split the thp and drop any accidentally initialized
+ * pages within area_dst.
+ */
+ uffd_test_ops->release_pages(area_dst);
+
+ pipefd = malloc(sizeof(int) * nr_cpus * 2);
+ if (!pipefd)
+ err("pipefd");
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK))
+ err("pipe");
+}
+
+static inline void uffd_test_ctx_init(uint64_t features)
+{
+ uffd_test_ctx_init_ext(&features);
+}
+
static int my_bcmp(char *str1, char *str2, size_t n)
{
unsigned long i;
@@ -363,27 +486,33 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
/* Undo write-protect, do wakeup after that */
prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
- if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) {
- fprintf(stderr, "clear WP failed for address 0x%" PRIx64 "\n",
- (uint64_t)start);
- exit(1);
- }
+ if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
+ err("clear WP failed: address=0x%"PRIx64, (uint64_t)start);
}
static void continue_range(int ufd, __u64 start, __u64 len)
{
struct uffdio_continue req;
+ int ret;
req.range.start = start;
req.range.len = len;
req.mode = 0;
- if (ioctl(ufd, UFFDIO_CONTINUE, &req)) {
- fprintf(stderr,
- "UFFDIO_CONTINUE failed for address 0x%" PRIx64 "\n",
- (uint64_t)start);
- exit(1);
- }
+ if (ioctl(ufd, UFFDIO_CONTINUE, &req))
+ err("UFFDIO_CONTINUE failed for address 0x%" PRIx64,
+ (uint64_t)start);
+
+ /*
+ * Error handling within the kernel for continue is subtly different
+ * from copy or zeropage, so it may be a source of bugs. Trigger an
+ * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG.
+ */
+ req.mapped = 0;
+ ret = ioctl(ufd, UFFDIO_CONTINUE, &req);
+ if (ret >= 0 || req.mapped != -EEXIST)
+ err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64,
+ ret, (int64_t) req.mapped);
}
static void *locking_thread(void *arg)
@@ -395,7 +524,6 @@ static void *locking_thread(void *arg)
unsigned long long count;
char randstate[64];
unsigned int seed;
- time_t start;
if (bounces & BOUNCE_RANDOM) {
seed = (unsigned int) time(NULL) - bounces;
@@ -403,10 +531,8 @@ static void *locking_thread(void *arg)
seed += cpu;
bzero(&rand, sizeof(rand));
bzero(&randstate, sizeof(randstate));
- if (initstate_r(seed, randstate, sizeof(randstate), &rand)) {
- fprintf(stderr, "srandom_r error\n");
- exit(1);
- }
+ if (initstate_r(seed, randstate, sizeof(randstate), &rand))
+ err("initstate_r failed");
} else {
page_nr = -bounces;
if (!(bounces & BOUNCE_RACINGFAULTS))
@@ -415,92 +541,26 @@ static void *locking_thread(void *arg)
while (!finished) {
if (bounces & BOUNCE_RANDOM) {
- if (random_r(&rand, &rand_nr)) {
- fprintf(stderr, "random_r 1 error\n");
- exit(1);
- }
+ if (random_r(&rand, &rand_nr))
+ err("random_r failed");
page_nr = rand_nr;
if (sizeof(page_nr) > sizeof(rand_nr)) {
- if (random_r(&rand, &rand_nr)) {
- fprintf(stderr, "random_r 2 error\n");
- exit(1);
- }
+ if (random_r(&rand, &rand_nr))
+ err("random_r failed");
page_nr |= (((unsigned long) rand_nr) << 16) <<
16;
}
} else
page_nr += 1;
page_nr %= nr_pages;
-
- start = time(NULL);
- if (bounces & BOUNCE_VERIFY) {
- count = *area_count(area_dst, page_nr);
- if (!count) {
- fprintf(stderr,
- "page_nr %lu wrong count %Lu %Lu\n",
- page_nr, count,
- count_verify[page_nr]);
- exit(1);
- }
-
-
- /*
- * We can't use bcmp (or memcmp) because that
- * returns 0 erroneously if the memory is
- * changing under it (even if the end of the
- * page is never changing and always
- * different).
- */
-#if 1
- if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
- page_size)) {
- fprintf(stderr,
- "my_bcmp page_nr %lu wrong count %Lu %Lu\n",
- page_nr, count, count_verify[page_nr]);
- exit(1);
- }
-#else
- unsigned long loops;
-
- loops = 0;
- /* uncomment the below line to test with mutex */
- /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
- while (!bcmp(area_dst + page_nr * page_size, zeropage,
- page_size)) {
- loops += 1;
- if (loops > 10)
- break;
- }
- /* uncomment below line to test with mutex */
- /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
- if (loops) {
- fprintf(stderr,
- "page_nr %lu all zero thread %lu %p %lu\n",
- page_nr, cpu, area_dst + page_nr * page_size,
- loops);
- if (loops > 10)
- exit(1);
- }
-#endif
- }
-
pthread_mutex_lock(area_mutex(area_dst, page_nr));
count = *area_count(area_dst, page_nr);
- if (count != count_verify[page_nr]) {
- fprintf(stderr,
- "page_nr %lu memory corruption %Lu %Lu\n",
- page_nr, count,
- count_verify[page_nr]); exit(1);
- }
+ if (count != count_verify[page_nr])
+ err("page_nr %lu memory corruption %llu %llu",
+ page_nr, count, count_verify[page_nr]);
count++;
*area_count(area_dst, page_nr) = count_verify[page_nr] = count;
pthread_mutex_unlock(area_mutex(area_dst, page_nr));
-
- if (time(NULL) - start > 1)
- fprintf(stderr,
- "userfault too slow %ld "
- "possible false positive with overcommit\n",
- time(NULL) - start);
}
return NULL;
@@ -514,22 +574,33 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
offset);
if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
/* real retval in ufdio_copy.copy */
- if (uffdio_copy->copy != -EEXIST) {
- uffd_error(uffdio_copy->copy,
- "UFFDIO_COPY retry error");
- }
- } else
- uffd_error(uffdio_copy->copy, "UFFDIO_COPY retry unexpected");
+ if (uffdio_copy->copy != -EEXIST)
+ err("UFFDIO_COPY retry error: %"PRId64,
+ (int64_t)uffdio_copy->copy);
+ } else {
+ err("UFFDIO_COPY retry unexpected: %"PRId64,
+ (int64_t)uffdio_copy->copy);
+ }
+}
+
+static void wake_range(int ufd, unsigned long addr, unsigned long len)
+{
+ struct uffdio_range uffdio_wake;
+
+ uffdio_wake.start = addr;
+ uffdio_wake.len = len;
+
+ if (ioctl(ufd, UFFDIO_WAKE, &uffdio_wake))
+ fprintf(stderr, "error waking %lu\n",
+ addr), exit(1);
}
static int __copy_page(int ufd, unsigned long offset, bool retry)
{
struct uffdio_copy uffdio_copy;
- if (offset >= nr_pages * page_size) {
- fprintf(stderr, "unexpected offset %lu\n", offset);
- exit(1);
- }
+ if (offset >= nr_pages * page_size)
+ err("unexpected offset %lu\n", offset);
uffdio_copy.dst = (unsigned long) area_dst + offset;
uffdio_copy.src = (unsigned long) area_src + offset;
uffdio_copy.len = page_size;
@@ -541,9 +612,11 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
/* real retval in ufdio_copy.copy */
if (uffdio_copy.copy != -EEXIST)
- uffd_error(uffdio_copy.copy, "UFFDIO_COPY error");
+ err("UFFDIO_COPY error: %"PRId64,
+ (int64_t)uffdio_copy.copy);
+ wake_range(ufd, uffdio_copy.dst, page_size);
} else if (uffdio_copy.copy != page_size) {
- uffd_error(uffdio_copy.copy, "UFFDIO_COPY unexpected copy");
+ err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy);
} else {
if (test_uffdio_copy_eexist && retry) {
test_uffdio_copy_eexist = false;
@@ -572,11 +645,10 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
if (ret < 0) {
if (errno == EAGAIN)
return 1;
- perror("blocking read error");
+ err("blocking read error");
} else {
- fprintf(stderr, "short read\n");
+ err("short read");
}
- exit(1);
}
return 0;
@@ -587,10 +659,8 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
{
unsigned long offset;
- if (msg->event != UFFD_EVENT_PAGEFAULT) {
- fprintf(stderr, "unexpected msg event %u\n", msg->event);
- exit(1);
- }
+ if (msg->event != UFFD_EVENT_PAGEFAULT)
+ err("unexpected msg event %u", msg->event);
if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
/* Write protect page faults */
@@ -621,11 +691,8 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
stats->minor_faults++;
} else {
/* Missing page faults */
- if (bounces & BOUNCE_VERIFY &&
- msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) {
- fprintf(stderr, "unexpected write fault\n");
- exit(1);
- }
+ if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ err("unexpected write fault");
offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
offset &= ~(page_size-1);
@@ -652,32 +719,20 @@ static void *uffd_poll_thread(void *arg)
for (;;) {
ret = poll(pollfd, 2, -1);
- if (!ret) {
- fprintf(stderr, "poll error %d\n", ret);
- exit(1);
- }
- if (ret < 0) {
- perror("poll");
- exit(1);
- }
+ if (ret <= 0)
+ err("poll error: %d", ret);
if (pollfd[1].revents & POLLIN) {
- if (read(pollfd[1].fd, &tmp_chr, 1) != 1) {
- fprintf(stderr, "read pipefd error\n");
- exit(1);
- }
+ if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
+ err("read pipefd error");
break;
}
- if (!(pollfd[0].revents & POLLIN)) {
- fprintf(stderr, "pollfd[0].revents %d\n",
- pollfd[0].revents);
- exit(1);
- }
+ if (!(pollfd[0].revents & POLLIN))
+ err("pollfd[0].revents %d", pollfd[0].revents);
if (uffd_read_msg(uffd, &msg))
continue;
switch (msg.event) {
default:
- fprintf(stderr, "unexpected msg event %u\n",
- msg.event); exit(1);
+ err("unexpected msg event %u\n", msg.event);
break;
case UFFD_EVENT_PAGEFAULT:
uffd_handle_page_fault(&msg, stats);
@@ -691,10 +746,8 @@ static void *uffd_poll_thread(void *arg)
uffd_reg.range.start = msg.arg.remove.start;
uffd_reg.range.len = msg.arg.remove.end -
msg.arg.remove.start;
- if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) {
- fprintf(stderr, "remove failure\n");
- exit(1);
- }
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
+ err("remove failure");
break;
case UFFD_EVENT_REMAP:
area_dst = (char *)(unsigned long)msg.arg.remap.to;
@@ -797,9 +850,7 @@ static int stress(struct uffd_stats *uffd_stats)
* UFFDIO_COPY without writing zero pages into area_dst
* because the background threads already completed).
*/
- if (uffd_test_ops->release_pages(area_src))
- return 1;
-
+ uffd_test_ops->release_pages(area_src);
finished = 1;
for (cpu = 0; cpu < nr_cpus; cpu++)
@@ -809,10 +860,8 @@ static int stress(struct uffd_stats *uffd_stats)
for (cpu = 0; cpu < nr_cpus; cpu++) {
char c;
if (bounces & BOUNCE_POLL) {
- if (write(pipefd[cpu*2+1], &c, 1) != 1) {
- fprintf(stderr, "pipefd write error\n");
- return 1;
- }
+ if (write(pipefd[cpu*2+1], &c, 1) != 1)
+ err("pipefd write error");
if (pthread_join(uffd_threads[cpu],
(void *)&uffd_stats[cpu]))
return 1;
@@ -827,40 +876,6 @@ static int stress(struct uffd_stats *uffd_stats)
return 0;
}
-static int userfaultfd_open_ext(uint64_t *features)
-{
- struct uffdio_api uffdio_api;
-
- uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
- if (uffd < 0) {
- fprintf(stderr,
- "userfaultfd syscall not available in this kernel\n");
- return 1;
- }
- uffd_flags = fcntl(uffd, F_GETFD, NULL);
-
- uffdio_api.api = UFFD_API;
- uffdio_api.features = *features;
- if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
- fprintf(stderr, "UFFDIO_API failed.\nPlease make sure to "
- "run with either root or ptrace capability.\n");
- return 1;
- }
- if (uffdio_api.api != UFFD_API) {
- fprintf(stderr, "UFFDIO_API error: %" PRIu64 "\n",
- (uint64_t)uffdio_api.api);
- return 1;
- }
-
- *features = uffdio_api.features;
- return 0;
-}
-
-static int userfaultfd_open(uint64_t features)
-{
- return userfaultfd_open_ext(&features);
-}
-
sigjmp_buf jbuf, *sigbuf;
static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
@@ -912,10 +927,8 @@ static int faulting_process(int signal_test)
memset(&act, 0, sizeof(act));
act.sa_sigaction = sighndl;
act.sa_flags = SA_SIGINFO;
- if (sigaction(SIGBUS, &act, 0)) {
- perror("sigaction");
- return 1;
- }
+ if (sigaction(SIGBUS, &act, 0))
+ err("sigaction");
lastnr = (unsigned long)-1;
}
@@ -925,10 +938,8 @@ static int faulting_process(int signal_test)
if (signal_test) {
if (sigsetjmp(*sigbuf, 1) != 0) {
- if (steps == 1 && nr == lastnr) {
- fprintf(stderr, "Signal repeated\n");
- return 1;
- }
+ if (steps == 1 && nr == lastnr)
+ err("Signal repeated");
lastnr = nr;
if (signal_test == 1) {
@@ -953,12 +964,9 @@ static int faulting_process(int signal_test)
}
count = *area_count(area_dst, nr);
- if (count != count_verify[nr]) {
- fprintf(stderr,
- "nr %lu memory corruption %Lu %Lu\n",
- nr, count,
- count_verify[nr]);
- }
+ if (count != count_verify[nr])
+ err("nr %lu memory corruption %llu %llu\n",
+ nr, count, count_verify[nr]);
/*
* Trigger write protection if there is by writing
* the same value back.
@@ -974,18 +982,16 @@ static int faulting_process(int signal_test)
area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
- if (area_dst == MAP_FAILED) {
- perror("mremap");
- exit(1);
- }
+ if (area_dst == MAP_FAILED)
+ err("mremap");
+ /* Reset area_src since we just clobbered it */
+ area_src = NULL;
for (; nr < nr_pages; nr++) {
count = *area_count(area_dst, nr);
if (count != count_verify[nr]) {
- fprintf(stderr,
- "nr %lu memory corruption %Lu %Lu\n",
- nr, count,
- count_verify[nr]); exit(1);
+ err("nr %lu memory corruption %llu %llu\n",
+ nr, count, count_verify[nr]);
}
/*
* Trigger write protection if there is by writing
@@ -994,15 +1000,11 @@ static int faulting_process(int signal_test)
*area_count(area_dst, nr) = count;
}
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
+ uffd_test_ops->release_pages(area_dst);
- for (nr = 0; nr < nr_pages; nr++) {
- if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) {
- fprintf(stderr, "nr %lu is not zero\n", nr);
- exit(1);
- }
- }
+ for (nr = 0; nr < nr_pages; nr++)
+ if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
+ err("nr %lu is not zero", nr);
return 0;
}
@@ -1015,13 +1017,12 @@ static void retry_uffdio_zeropage(int ufd,
uffdio_zeropage->range.len,
offset);
if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
- if (uffdio_zeropage->zeropage != -EEXIST) {
- uffd_error(uffdio_zeropage->zeropage,
- "UFFDIO_ZEROPAGE retry error");
- }
+ if (uffdio_zeropage->zeropage != -EEXIST)
+ err("UFFDIO_ZEROPAGE error: %"PRId64,
+ (int64_t)uffdio_zeropage->zeropage);
} else {
- uffd_error(uffdio_zeropage->zeropage,
- "UFFDIO_ZEROPAGE retry unexpected");
+ err("UFFDIO_ZEROPAGE error: %"PRId64,
+ (int64_t)uffdio_zeropage->zeropage);
}
}
@@ -1034,10 +1035,8 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
- if (offset >= nr_pages * page_size) {
- fprintf(stderr, "unexpected offset %lu\n", offset);
- exit(1);
- }
+ if (offset >= nr_pages * page_size)
+ err("unexpected offset %lu", offset);
uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
uffdio_zeropage.range.len = page_size;
uffdio_zeropage.mode = 0;
@@ -1045,14 +1044,13 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
res = uffdio_zeropage.zeropage;
if (ret) {
/* real retval in ufdio_zeropage.zeropage */
- if (has_zeropage) {
- uffd_error(res, "UFFDIO_ZEROPAGE %s",
- res == -EEXIST ? "-EEXIST" : "error");
- } else if (res != -EINVAL)
- uffd_error(res, "UFFDIO_ZEROPAGE not -EINVAL");
+ if (has_zeropage)
+ err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res);
+ else if (res != -EINVAL)
+ err("UFFDIO_ZEROPAGE not -EINVAL");
} else if (has_zeropage) {
if (res != page_size) {
- uffd_error(res, "UFFDIO_ZEROPAGE unexpected");
+ err("UFFDIO_ZEROPAGE unexpected size");
} else {
if (test_uffdio_zeropage_eexist && retry) {
test_uffdio_zeropage_eexist = false;
@@ -1062,7 +1060,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
return 1;
}
} else
- uffd_error(res, "UFFDIO_ZEROPAGE succeeded");
+ err("UFFDIO_ZEROPAGE succeeded");
return 0;
}
@@ -1081,37 +1079,24 @@ static int userfaultfd_zeropage_test(void)
printf("testing UFFDIO_ZEROPAGE: ");
fflush(stdout);
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
+ uffd_test_ctx_init(0);
- if (userfaultfd_open(0))
- return 1;
uffdio_register.range.start = (unsigned long) area_dst;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure\n");
- exit(1);
- }
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failure");
expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls) {
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n");
- exit(1);
- }
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls)
+ err("unexpected missing ioctl for anon memory");
- if (uffdio_zeropage(uffd, 0)) {
- if (my_bcmp(area_dst, zeropage, page_size)) {
- fprintf(stderr, "zeropage is not zero\n");
- exit(1);
- }
- }
+ if (uffdio_zeropage(uffd, 0))
+ if (my_bcmp(area_dst, zeropage, page_size))
+ err("zeropage is not zero");
- close(uffd);
printf("done.\n");
return 0;
}
@@ -1129,13 +1114,10 @@ static int userfaultfd_events_test(void)
printf("testing events (fork, remap, remove): ");
fflush(stdout);
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
-
features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
UFFD_FEATURE_EVENT_REMOVE;
- if (userfaultfd_open(features))
- return 1;
+ uffd_test_ctx_init(features);
+
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
uffdio_register.range.start = (unsigned long) area_dst;
@@ -1143,46 +1125,31 @@ static int userfaultfd_events_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure\n");
- exit(1);
- }
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failure");
expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
- fprintf(stderr, "unexpected missing ioctl for anon memory\n");
- exit(1);
- }
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls)
+ err("unexpected missing ioctl for anon memory");
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
- perror("uffd_poll_thread create");
- exit(1);
- }
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
+ err("uffd_poll_thread create");
pid = fork();
- if (pid < 0) {
- perror("fork");
- exit(1);
- }
+ if (pid < 0)
+ err("fork");
if (!pid)
exit(faulting_process(0));
waitpid(pid, &err, 0);
- if (err) {
- fprintf(stderr, "faulting process failed\n");
- exit(1);
- }
-
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
- perror("pipe write");
- exit(1);
- }
+ if (err)
+ err("faulting process failed");
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
if (pthread_join(uffd_mon, NULL))
return 1;
- close(uffd);
-
uffd_stats_report(&stats, 1);
return stats.missing_faults != nr_pages;
@@ -1202,12 +1169,9 @@ static int userfaultfd_sig_test(void)
printf("testing signal delivery: ");
fflush(stdout);
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
-
features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
- if (userfaultfd_open(features))
- return 1;
+ uffd_test_ctx_init(features);
+
fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
uffdio_register.range.start = (unsigned long) area_dst;
@@ -1215,57 +1179,40 @@ static int userfaultfd_sig_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure\n");
- exit(1);
- }
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failure");
expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
- fprintf(stderr, "unexpected missing ioctl for anon memory\n");
- exit(1);
- }
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls)
+ err("unexpected missing ioctl for anon memory");
- if (faulting_process(1)) {
- fprintf(stderr, "faulting process failed\n");
- exit(1);
- }
+ if (faulting_process(1))
+ err("faulting process failed");
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
+ uffd_test_ops->release_pages(area_dst);
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
- perror("uffd_poll_thread create");
- exit(1);
- }
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
+ err("uffd_poll_thread create");
pid = fork();
- if (pid < 0) {
- perror("fork");
- exit(1);
- }
+ if (pid < 0)
+ err("fork");
if (!pid)
exit(faulting_process(2));
waitpid(pid, &err, 0);
- if (err) {
- fprintf(stderr, "faulting process failed\n");
- exit(1);
- }
-
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
- perror("pipe write");
- exit(1);
- }
+ if (err)
+ err("faulting process failed");
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
if (pthread_join(uffd_mon, (void **)&userfaults))
return 1;
printf("done.\n");
if (userfaults)
- fprintf(stderr, "Signal test failed, userfaults: %ld\n",
- userfaults);
- close(uffd);
+ err("Signal test failed, userfaults: %ld", userfaults);
+
return userfaults != 0;
}
@@ -1279,7 +1226,7 @@ static int userfaultfd_minor_test(void)
void *expected_page;
char c;
struct uffd_stats stats = { 0 };
- uint64_t features = UFFD_FEATURE_MINOR_HUGETLBFS;
+ uint64_t req_features, features_out;
if (!test_uffdio_minor)
return 0;
@@ -1287,13 +1234,17 @@ static int userfaultfd_minor_test(void)
printf("testing minor faults: ");
fflush(stdout);
- if (uffd_test_ops->release_pages(area_dst))
+ if (test_type == TEST_HUGETLB)
+ req_features = UFFD_FEATURE_MINOR_HUGETLBFS;
+ else if (test_type == TEST_SHMEM)
+ req_features = UFFD_FEATURE_MINOR_SHMEM;
+ else
return 1;
- if (userfaultfd_open_ext(&features))
- return 1;
- /* If kernel reports the feature isn't supported, skip the test. */
- if (!(features & UFFD_FEATURE_MINOR_HUGETLBFS)) {
+ features_out = req_features;
+ uffd_test_ctx_init_ext(&features_out);
+ /* If kernel reports required features aren't supported, skip test. */
+ if ((features_out & req_features) != req_features) {
printf("skipping test due to lack of feature support\n");
fflush(stdout);
return 0;
@@ -1302,17 +1253,13 @@ static int userfaultfd_minor_test(void)
uffdio_register.range.start = (unsigned long)area_dst_alias;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MINOR;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure\n");
- exit(1);
- }
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failure");
expected_ioctls = uffd_test_ops->expected_ioctls;
expected_ioctls |= 1 << _UFFDIO_CONTINUE;
- if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
- fprintf(stderr, "unexpected missing ioctl(s)\n");
- exit(1);
- }
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls)
+ err("unexpected missing ioctl(s)");
/*
* After registering with UFFD, populate the non-UFFD-registered side of
@@ -1323,10 +1270,8 @@ static int userfaultfd_minor_test(void)
page_size);
}
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
- perror("uffd_poll_thread create");
- exit(1);
- }
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
+ err("uffd_poll_thread create");
/*
* Read each of the pages back using the UFFD-registered mapping. We
@@ -1335,92 +1280,173 @@ static int userfaultfd_minor_test(void)
* page's contents, and then issuing a CONTINUE ioctl.
*/
- if (posix_memalign(&expected_page, page_size, page_size)) {
- fprintf(stderr, "out of memory\n");
- return 1;
- }
+ if (posix_memalign(&expected_page, page_size, page_size))
+ err("out of memory");
for (p = 0; p < nr_pages; ++p) {
expected_byte = ~((uint8_t)(p % ((uint8_t)-1)));
memset(expected_page, expected_byte, page_size);
if (my_bcmp(expected_page, area_dst_alias + (p * page_size),
- page_size)) {
- fprintf(stderr,
- "unexpected page contents after minor fault\n");
- exit(1);
- }
+ page_size))
+ err("unexpected page contents after minor fault");
}
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
- perror("pipe write");
- exit(1);
- }
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
if (pthread_join(uffd_mon, NULL))
return 1;
- close(uffd);
-
uffd_stats_report(&stats, 1);
return stats.missing_faults != 0 || stats.minor_faults != nr_pages;
}
-static int userfaultfd_stress(void)
+#define BIT_ULL(nr) (1ULL << (nr))
+#define PM_SOFT_DIRTY BIT_ULL(55)
+#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
+#define PM_UFFD_WP BIT_ULL(57)
+#define PM_FILE BIT_ULL(61)
+#define PM_SWAP BIT_ULL(62)
+#define PM_PRESENT BIT_ULL(63)
+
+static int pagemap_open(void)
{
- void *area;
- char *tmp_area;
- unsigned long nr;
- struct uffdio_register uffdio_register;
- unsigned long cpu;
- int err;
- struct uffd_stats uffd_stats[nr_cpus];
+ int fd = open("/proc/self/pagemap", O_RDONLY);
- uffd_test_ops->allocate_area((void **)&area_src);
- if (!area_src)
- return 1;
- uffd_test_ops->allocate_area((void **)&area_dst);
- if (!area_dst)
- return 1;
+ if (fd < 0)
+ err("open pagemap");
- if (userfaultfd_open(0))
- return 1;
+ return fd;
+}
- count_verify = malloc(nr_pages * sizeof(unsigned long long));
- if (!count_verify) {
- perror("count_verify");
- return 1;
- }
+static uint64_t pagemap_read_vaddr(int fd, void *vaddr)
+{
+ uint64_t value;
+ int ret;
- for (nr = 0; nr < nr_pages; nr++) {
- *area_mutex(area_src, nr) = (pthread_mutex_t)
- PTHREAD_MUTEX_INITIALIZER;
- count_verify[nr] = *area_count(area_src, nr) = 1;
+ ret = pread(fd, &value, sizeof(uint64_t),
+ ((uint64_t)vaddr >> 12) * sizeof(uint64_t));
+ if (ret != sizeof(uint64_t))
+ err("pread() on pagemap failed");
+
+ return value;
+}
+
+/* This macro let __LINE__ works in err() */
+#define pagemap_check_wp(value, wp) do { \
+ if (!!(value & PM_UFFD_WP) != wp) \
+ err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \
+ } while (0)
+
+static int pagemap_test_fork(bool present)
+{
+ pid_t child = fork();
+ uint64_t value;
+ int fd, result;
+
+ if (!child) {
+ /* Open the pagemap fd of the child itself */
+ fd = pagemap_open();
+ value = pagemap_read_vaddr(fd, area_dst);
/*
- * In the transition between 255 to 256, powerpc will
- * read out of order in my_bcmp and see both bytes as
- * zero, so leave a placeholder below always non-zero
- * after the count, to avoid my_bcmp to trigger false
- * positives.
+ * After fork() uffd-wp bit should be gone as long as we're
+ * without UFFD_FEATURE_EVENT_FORK
*/
- *(area_count(area_src, nr) + 1) = 1;
+ pagemap_check_wp(value, false);
+ /* Succeed */
+ exit(0);
}
+ waitpid(child, &result, 0);
+ return result;
+}
- pipefd = malloc(sizeof(int) * nr_cpus * 2);
- if (!pipefd) {
- perror("pipefd");
- return 1;
- }
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
- perror("pipe");
- return 1;
- }
- }
+static void userfaultfd_pagemap_test(unsigned int test_pgsize)
+{
+ struct uffdio_register uffdio_register;
+ int pagemap_fd;
+ uint64_t value;
- if (posix_memalign(&area, page_size, page_size)) {
- fprintf(stderr, "out of memory\n");
- return 1;
+ /* Pagemap tests uffd-wp only */
+ if (!test_uffdio_wp)
+ return;
+
+ /* Not enough memory to test this page size */
+ if (test_pgsize > nr_pages * page_size)
+ return;
+
+ printf("testing uffd-wp with pagemap (pgsize=%u): ", test_pgsize);
+ /* Flush so it doesn't flush twice in parent/child later */
+ fflush(stdout);
+
+ uffd_test_ctx_init(0);
+
+ if (test_pgsize > page_size) {
+ /* This is a thp test */
+ if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE))
+ err("madvise(MADV_HUGEPAGE) failed");
+ } else if (test_pgsize == page_size) {
+ /* This is normal page test; force no thp */
+ if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE))
+ err("madvise(MADV_NOHUGEPAGE) failed");
}
+
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failed");
+
+ pagemap_fd = pagemap_open();
+
+ /* Touch the page */
+ *area_dst = 1;
+ wp_range(uffd, (uint64_t)area_dst, test_pgsize, true);
+ value = pagemap_read_vaddr(pagemap_fd, area_dst);
+ pagemap_check_wp(value, true);
+ /* Make sure uffd-wp bit dropped when fork */
+ if (pagemap_test_fork(true))
+ err("Detected stall uffd-wp bit in child");
+
+ /* Exclusive required or PAGEOUT won't work */
+ if (!(value & PM_MMAP_EXCLUSIVE))
+ err("multiple mapping detected: 0x%"PRIx64, value);
+
+ if (madvise(area_dst, test_pgsize, MADV_PAGEOUT))
+ err("madvise(MADV_PAGEOUT) failed");
+
+ /* Uffd-wp should persist even swapped out */
+ value = pagemap_read_vaddr(pagemap_fd, area_dst);
+ pagemap_check_wp(value, true);
+ /* Make sure uffd-wp bit dropped when fork */
+ if (pagemap_test_fork(false))
+ err("Detected stall uffd-wp bit in child");
+
+ /* Unprotect; this tests swap pte modifications */
+ wp_range(uffd, (uint64_t)area_dst, page_size, false);
+ value = pagemap_read_vaddr(pagemap_fd, area_dst);
+ pagemap_check_wp(value, false);
+
+ /* Fault in the page from disk */
+ *area_dst = 2;
+ value = pagemap_read_vaddr(pagemap_fd, area_dst);
+ pagemap_check_wp(value, false);
+
+ close(pagemap_fd);
+ printf("done\n");
+}
+
+static int userfaultfd_stress(void)
+{
+ void *area;
+ char *tmp_area;
+ unsigned long nr;
+ struct uffdio_register uffdio_register;
+ struct uffd_stats uffd_stats[nr_cpus];
+
+ uffd_test_ctx_init(0);
+
+ if (posix_memalign(&area, page_size, page_size))
+ err("out of memory");
zeropage = area;
bzero(zeropage, page_size);
@@ -1429,7 +1455,6 @@ static int userfaultfd_stress(void)
pthread_attr_init(&attr);
pthread_attr_setstacksize(&attr, 16*1024*1024);
- err = 0;
while (bounces--) {
unsigned long expected_ioctls;
@@ -1458,25 +1483,18 @@ static int userfaultfd_stress(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure\n");
- return 1;
- }
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failure");
expected_ioctls = uffd_test_ops->expected_ioctls;
if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls) {
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n");
- return 1;
- }
+ expected_ioctls)
+ err("unexpected missing ioctl for anon memory");
if (area_dst_alias) {
uffdio_register.range.start = (unsigned long)
area_dst_alias;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure alias\n");
- return 1;
- }
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ err("register failure alias");
}
/*
@@ -1503,8 +1521,7 @@ static int userfaultfd_stress(void)
* MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
* required to MADV_DONTNEED here.
*/
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
+ uffd_test_ops->release_pages(area_dst);
uffd_stats_reset(uffd_stats, nr_cpus);
@@ -1518,33 +1535,22 @@ static int userfaultfd_stress(void)
nr_pages * page_size, false);
/* unregister */
- if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
- fprintf(stderr, "unregister failure\n");
- return 1;
- }
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range))
+ err("unregister failure");
if (area_dst_alias) {
uffdio_register.range.start = (unsigned long) area_dst;
if (ioctl(uffd, UFFDIO_UNREGISTER,
- &uffdio_register.range)) {
- fprintf(stderr, "unregister failure alias\n");
- return 1;
- }
+ &uffdio_register.range))
+ err("unregister failure alias");
}
/* verification */
- if (bounces & BOUNCE_VERIFY) {
- for (nr = 0; nr < nr_pages; nr++) {
- if (*area_count(area_dst, nr) != count_verify[nr]) {
- fprintf(stderr,
- "error area_count %Lu %Lu %lu\n",
- *area_count(area_src, nr),
- count_verify[nr],
- nr);
- err = 1;
- bounces = 0;
- }
- }
- }
+ if (bounces & BOUNCE_VERIFY)
+ for (nr = 0; nr < nr_pages; nr++)
+ if (*area_count(area_dst, nr) != count_verify[nr])
+ err("error area_count %llu %llu %lu\n",
+ *area_count(area_src, nr),
+ count_verify[nr], nr);
/* prepare next bounce */
tmp_area = area_src;
@@ -1558,10 +1564,21 @@ static int userfaultfd_stress(void)
uffd_stats_report(uffd_stats, nr_cpus);
}
- if (err)
- return err;
+ if (test_type == TEST_ANON) {
+ /*
+ * shmem/hugetlb won't be able to run since they have different
+ * behavior on fork() (file-backed memory normally drops ptes
+ * directly when fork), meanwhile the pagemap test will verify
+ * pgtable entry of fork()ed child.
+ */
+ userfaultfd_pagemap_test(page_size);
+ /*
+ * Hard-code for x86_64 for now for 2M THP, as x86_64 is
+ * currently the only one that supports uffd-wp
+ */
+ userfaultfd_pagemap_test(page_size * 512);
+ }
- close(uffd);
return userfaultfd_zeropage_test() || userfaultfd_sig_test()
|| userfaultfd_events_test() || userfaultfd_minor_test();
}
@@ -1610,8 +1627,9 @@ static void set_test_type(const char *type)
map_shared = true;
test_type = TEST_SHMEM;
uffd_test_ops = &shmem_uffd_test_ops;
+ test_uffdio_minor = true;
} else {
- fprintf(stderr, "Unknown test type: %s\n", type); exit(1);
+ err("Unknown test type: %s", type);
}
if (test_type == TEST_HUGETLB)
@@ -1619,15 +1637,11 @@ static void set_test_type(const char *type)
else
page_size = sysconf(_SC_PAGE_SIZE);
- if (!page_size) {
- fprintf(stderr, "Unable to determine page size\n");
- exit(2);
- }
+ if (!page_size)
+ err("Unable to determine page size");
if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
- > page_size) {
- fprintf(stderr, "Impossible to run this test\n");
- exit(2);
- }
+ > page_size)
+ err("Impossible to run this test");
}
static void sigalrm(int sig)
@@ -1644,10 +1658,8 @@ int main(int argc, char **argv)
if (argc < 4)
usage();
- if (signal(SIGALRM, sigalrm) == SIG_ERR) {
- fprintf(stderr, "failed to arm SIGALRM");
- exit(1);
- }
+ if (signal(SIGALRM, sigalrm) == SIG_ERR)
+ err("failed to arm SIGALRM");
alarm(ALARM_INTERVAL_SECS);
set_test_type(argv[1]);
@@ -1656,13 +1668,13 @@ int main(int argc, char **argv)
nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
nr_cpus;
if (!nr_pages_per_cpu) {
- fprintf(stderr, "invalid MiB\n");
+ _err("invalid MiB");
usage();
}
bounces = atoi(argv[3]);
if (bounces <= 0) {
- fprintf(stderr, "invalid bounces\n");
+ _err("invalid bounces");
usage();
}
nr_pages = nr_pages_per_cpu * nr_cpus;
@@ -1671,16 +1683,20 @@ int main(int argc, char **argv)
if (argc < 5)
usage();
huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
- if (huge_fd < 0) {
- fprintf(stderr, "Open of %s failed", argv[3]);
- perror("open");
- exit(1);
- }
- if (ftruncate(huge_fd, 0)) {
- fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
- perror("ftruncate");
- exit(1);
- }
+ if (huge_fd < 0)
+ err("Open of %s failed", argv[4]);
+ if (ftruncate(huge_fd, 0))
+ err("ftruncate %s to size 0 failed", argv[4]);
+ } else if (test_type == TEST_SHMEM) {
+ shm_fd = memfd_create(argv[0], 0);
+ if (shm_fd < 0)
+ err("memfd_create");
+ if (ftruncate(shm_fd, nr_pages * page_size * 2))
+ err("ftruncate");
+ if (fallocate(shm_fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
+ nr_pages * page_size * 2))
+ err("fallocate");
}
printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
nr_pages, nr_pages_per_cpu);
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 333980375bc7..b4142cd1c5c2 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -13,11 +13,12 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
test_vsyscall mov_ss_trap \
- syscall_arg_fault fsgsbase_restore
+ syscall_arg_fault fsgsbase_restore sigaltstack
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
+ corrupt_xstate_header
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
diff --git a/tools/testing/selftests/x86/corrupt_xstate_header.c b/tools/testing/selftests/x86/corrupt_xstate_header.c
new file mode 100644
index 000000000000..ab8599c10ce5
--- /dev/null
+++ b/tools/testing/selftests/x86/corrupt_xstate_header.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Corrupt the XSTATE header in a signal frame
+ *
+ * Based on analysis and a test case from Thomas Gleixner.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <err.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/wait.h>
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+static inline int xsave_enabled(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ eax = 0x1;
+ ecx = 0x0;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ /* Is CR4.OSXSAVE enabled ? */
+ return ecx & (1U << 27);
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void sigusr1(int sig, siginfo_t *info, void *uc_void)
+{
+ ucontext_t *uc = uc_void;
+ uint8_t *fpstate = (uint8_t *)uc->uc_mcontext.fpregs;
+ uint64_t *xfeatures = (uint64_t *)(fpstate + 512);
+
+ printf("\tWreck XSTATE header\n");
+ /* Wreck the first reserved bytes in the header */
+ *(xfeatures + 2) = 0xfffffff;
+}
+
+static void sigsegv(int sig, siginfo_t *info, void *uc_void)
+{
+ printf("\tGot SIGSEGV\n");
+}
+
+int main(void)
+{
+ cpu_set_t set;
+
+ sethandler(SIGUSR1, sigusr1, 0);
+ sethandler(SIGSEGV, sigsegv, 0);
+
+ if (!xsave_enabled()) {
+ printf("[SKIP] CR4.OSXSAVE disabled.\n");
+ return 0;
+ }
+
+ CPU_ZERO(&set);
+ CPU_SET(0, &set);
+
+ /*
+ * Enforce that the child runs on the same CPU
+ * which in turn forces a schedule.
+ */
+ sched_setaffinity(getpid(), sizeof(set), &set);
+
+ printf("[RUN]\tSend ourselves a signal\n");
+ raise(SIGUSR1);
+
+ printf("[OK]\tBack from the signal. Now schedule.\n");
+ pid_t child = fork();
+ if (child < 0)
+ err(1, "fork");
+ if (child == 0)
+ return 0;
+ if (child)
+ waitpid(child, NULL, 0);
+ printf("[OK]\tBack in the main thread.\n");
+
+ /*
+ * We could try to confirm that extended state is still preserved
+ * when we schedule. For now, the only indication of failure is
+ * a warning in the kernel logs.
+ */
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
index 6da0ac3f0135..cc3de6ff9fba 100644
--- a/tools/testing/selftests/x86/mov_ss_trap.c
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -47,7 +47,6 @@
unsigned short ss;
extern unsigned char breakpoint_insn[];
sigjmp_buf jmpbuf;
-static unsigned char altstack_data[SIGSTKSZ];
static void enable_watchpoint(void)
{
@@ -250,13 +249,14 @@ int main()
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; SYSENTER\n");
stack_t stack = {
- .ss_sp = altstack_data,
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
err(1, "sigaltstack");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
nr = SYS_getpid;
+ free(stack.ss_sp);
/* Clear EBP first to make sure we segfault cleanly. */
asm volatile ("xorl %%ebp, %%ebp; mov %[ss], %%ss; SYSENTER" : "+a" (nr)
: [ss] "m" (ss) : "flags", "rcx"
diff --git a/tools/testing/selftests/x86/sigaltstack.c b/tools/testing/selftests/x86/sigaltstack.c
new file mode 100644
index 000000000000..f689af75e979
--- /dev/null
+++ b/tools/testing/selftests/x86/sigaltstack.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include <signal.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <setjmp.h>
+
+/* sigaltstack()-enforced minimum stack */
+#define ENFORCED_MINSIGSTKSZ 2048
+
+#ifndef AT_MINSIGSTKSZ
+# define AT_MINSIGSTKSZ 51
+#endif
+
+static int nerrs;
+
+static bool sigalrm_expected;
+
+static unsigned long at_minstack_size;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static int setup_altstack(void *start, unsigned long size)
+{
+ stack_t ss;
+
+ memset(&ss, 0, sizeof(ss));
+ ss.ss_size = size;
+ ss.ss_sp = start;
+
+ return sigaltstack(&ss, NULL);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ if (sigalrm_expected) {
+ printf("[FAIL]\tWrong signal delivered: SIGSEGV (expected SIGALRM).");
+ nerrs++;
+ } else {
+ printf("[OK]\tSIGSEGV signal delivered.\n");
+ }
+
+ siglongjmp(jmpbuf, 1);
+}
+
+static void sigalrm(int sig, siginfo_t *info, void *ctx_void)
+{
+ if (!sigalrm_expected) {
+ printf("[FAIL]\tWrong signal delivered: SIGALRM (expected SIGSEGV).");
+ nerrs++;
+ } else {
+ printf("[OK]\tSIGALRM signal delivered.\n");
+ }
+}
+
+static void test_sigaltstack(void *altstack, unsigned long size)
+{
+ if (setup_altstack(altstack, size))
+ err(1, "sigaltstack()");
+
+ sigalrm_expected = (size > at_minstack_size) ? true : false;
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ sethandler(SIGALRM, sigalrm, SA_ONSTACK);
+
+ if (!sigsetjmp(jmpbuf, 1)) {
+ printf("[RUN]\tTest an alternate signal stack of %ssufficient size.\n",
+ sigalrm_expected ? "" : "in");
+ printf("\tRaise SIGALRM. %s is expected to be delivered.\n",
+ sigalrm_expected ? "It" : "SIGSEGV");
+ raise(SIGALRM);
+ }
+
+ clearhandler(SIGALRM);
+ clearhandler(SIGSEGV);
+}
+
+int main(void)
+{
+ void *altstack;
+
+ at_minstack_size = getauxval(AT_MINSIGSTKSZ);
+
+ altstack = mmap(NULL, at_minstack_size + SIGSTKSZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (altstack == MAP_FAILED)
+ err(1, "mmap()");
+
+ if ((ENFORCED_MINSIGSTKSZ + 1) < at_minstack_size)
+ test_sigaltstack(altstack, ENFORCED_MINSIGSTKSZ + 1);
+
+ test_sigaltstack(altstack, at_minstack_size + SIGSTKSZ);
+
+ return nerrs == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 57c4f67f16ef..5d7961a5f7f6 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -138,9 +138,6 @@ static unsigned short LDT3(int idx)
return (idx << 3) | 7;
}
-/* Our sigaltstack scratch space. */
-static char altstack_data[SIGSTKSZ];
-
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -771,7 +768,8 @@ int main()
setup_ldt();
stack_t stack = {
- .ss_sp = altstack_data,
+ /* Our sigaltstack scratch space. */
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
@@ -872,5 +870,6 @@ int main()
total_nerrs += test_nonstrict_ss();
#endif
+ free(stack.ss_sp);
return total_nerrs ? 1 : 0;
}
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 120ac741fe44..9a30f443e928 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -57,7 +57,6 @@ static void clearhandler(int sig)
static volatile sig_atomic_t sig_traps, sig_eflags;
sigjmp_buf jmpbuf;
-static unsigned char altstack_data[SIGSTKSZ];
#ifdef __x86_64__
# define REG_IP REG_RIP
@@ -210,7 +209,7 @@ int main()
unsigned long nr = SYS_getpid;
printf("[RUN]\tSet TF and check SYSENTER\n");
stack_t stack = {
- .ss_sp = altstack_data,
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
@@ -219,6 +218,7 @@ int main()
SA_RESETHAND | SA_ONSTACK);
sethandler(SIGILL, print_and_longjmp, SA_RESETHAND);
set_eflags(get_eflags() | X86_EFLAGS_TF);
+ free(stack.ss_sp);
/* Clear EBP first to make sure we segfault cleanly. */
asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx"
#ifdef __x86_64__
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index bff474b5efc6..461fa41a4d02 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -17,9 +17,6 @@
#include "helpers.h"
-/* Our sigaltstack scratch space. */
-static unsigned char altstack_data[SIGSTKSZ];
-
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -104,7 +101,8 @@ static void sigill(int sig, siginfo_t *info, void *ctx_void)
int main()
{
stack_t stack = {
- .ss_sp = altstack_data,
+ /* Our sigaltstack scratch space. */
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
@@ -233,5 +231,6 @@ int main()
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
#endif
+ free(stack.ss_sp);
return 0;
}
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index d6b09cb1aa2c..991591718bb0 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -1,6 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * syscall_numbering.c - test calling the x86-64 kernel with various
+ * valid and invalid system call numbers.
+ *
* Copyright (c) 2018 Andrew Lutomirski
*/
@@ -11,79 +13,470 @@
#include <stdbool.h>
#include <errno.h>
#include <unistd.h>
-#include <syscall.h>
+#include <string.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <sysexits.h>
-static int nerrs;
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
-#define X32_BIT 0x40000000UL
+#include <linux/ptrace.h>
-static void check_enosys(unsigned long nr, bool *ok)
+/* Common system call numbers */
+#define SYS_READ 0
+#define SYS_WRITE 1
+#define SYS_GETPID 39
+/* x64-only system call numbers */
+#define X64_IOCTL 16
+#define X64_READV 19
+#define X64_WRITEV 20
+/* x32-only system call numbers (without X32_BIT) */
+#define X32_IOCTL 514
+#define X32_READV 515
+#define X32_WRITEV 516
+
+#define X32_BIT 0x40000000
+
+static int nullfd = -1; /* File descriptor for /dev/null */
+static bool with_x32; /* x32 supported on this kernel? */
+
+enum ptrace_pass {
+ PTP_NOTHING,
+ PTP_GETREGS,
+ PTP_WRITEBACK,
+ PTP_FUZZRET,
+ PTP_FUZZHIGH,
+ PTP_INTNUM,
+ PTP_DONE
+};
+
+static const char * const ptrace_pass_name[] =
{
- /* If this fails, a segfault is reasonably likely. */
- fflush(stdout);
+ [PTP_NOTHING] = "just stop, no data read",
+ [PTP_GETREGS] = "only getregs",
+ [PTP_WRITEBACK] = "getregs, unmodified setregs",
+ [PTP_FUZZRET] = "modifying the default return",
+ [PTP_FUZZHIGH] = "clobbering the top 32 bits",
+ [PTP_INTNUM] = "sign-extending the syscall number",
+};
- long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
- if (ret == 0) {
- printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
- *ok = false;
- } else if (errno != ENOSYS) {
- printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
- *ok = false;
- }
+/*
+ * Shared memory block between tracer and test
+ */
+struct shared {
+ unsigned int nerr; /* Total error count */
+ unsigned int indent; /* Message indentation level */
+ enum ptrace_pass ptrace_pass;
+ bool probing_syscall; /* In probe_syscall() */
+};
+static volatile struct shared *sh;
+
+static inline unsigned int offset(void)
+{
+ unsigned int level = sh ? sh->indent : 0;
+
+ return 8 + level * 4;
}
-static void test_x32_without_x32_bit(void)
+#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
+ ## __VA_ARGS__)
+
+#define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__)
+#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
+#define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__)
+
+#define fail(fmt, ...) \
+ do { \
+ msg(FAIL, fmt, ## __VA_ARGS__); \
+ sh->nerr++; \
+ } while (0)
+
+#define crit(fmt, ...) \
+ do { \
+ sh->indent = 0; \
+ msg(FAIL, fmt, ## __VA_ARGS__); \
+ msg(SKIP, "Unable to run test\n"); \
+ exit(EX_OSERR); \
+ } while (0)
+
+/* Sentinel for ptrace-modified return value */
+#define MODIFIED_BY_PTRACE -9999
+
+/*
+ * Directly invokes the given syscall with nullfd as the first argument
+ * and the rest zero. Avoids involving glibc wrappers in case they ever
+ * end up intercepting some system calls for some reason, or modify
+ * the system call number itself.
+ */
+static long long probe_syscall(int msb, int lsb)
{
- bool ok = true;
+ register long long arg1 asm("rdi") = nullfd;
+ register long long arg2 asm("rsi") = 0;
+ register long long arg3 asm("rdx") = 0;
+ register long long arg4 asm("r10") = 0;
+ register long long arg5 asm("r8") = 0;
+ register long long arg6 asm("r9") = 0;
+ long long nr = ((long long)msb << 32) | (unsigned int)lsb;
+ long long ret;
/*
- * Syscalls 512-547 are "x32" syscalls. They are intended to be
- * called with the x32 (0x40000000) bit set. Calling them without
- * the x32 bit set is nonsense and should not work.
+ * We pass in an extra copy of the extended system call number
+ * in %rbx, so we can examine it from the ptrace handler without
+ * worrying about it being possibly modified. This is to test
+ * the validity of struct user regs.orig_rax a.k.a.
+ * struct pt_regs.orig_ax.
*/
- printf("[RUN]\tChecking syscalls 512-547\n");
- for (int i = 512; i <= 547; i++)
- check_enosys(i, &ok);
+ sh->probing_syscall = true;
+ asm volatile("syscall"
+ : "=a" (ret)
+ : "a" (nr), "b" (nr),
+ "r" (arg1), "r" (arg2), "r" (arg3),
+ "r" (arg4), "r" (arg5), "r" (arg6)
+ : "rcx", "r11", "memory", "cc");
+ sh->probing_syscall = false;
+
+ return ret;
+}
+
+static const char *syscall_str(int msb, int start, int end)
+{
+ static char buf[64];
+ const char * const type = (start & X32_BIT) ? "x32" : "x64";
+ int lsb = start;
/*
- * Check that a handful of 64-bit-only syscalls are rejected if the x32
- * bit is set.
+ * Improve readability by stripping the x32 bit, but round
+ * toward zero so we don't display -1 as -1073741825.
*/
- printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
- check_enosys(16 | X32_BIT, &ok); /* ioctl */
- check_enosys(19 | X32_BIT, &ok); /* readv */
- check_enosys(20 | X32_BIT, &ok); /* writev */
+ if (lsb < 0)
+ lsb |= X32_BIT;
+ else
+ lsb &= ~X32_BIT;
+
+ if (start == end)
+ snprintf(buf, sizeof buf, "%s syscall %d:%d",
+ type, msb, lsb);
+ else
+ snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
+ type, msb, lsb, lsb + (end-start));
+
+ return buf;
+}
+
+static unsigned int _check_for(int msb, int start, int end, long long expect,
+ const char *expect_str)
+{
+ unsigned int err = 0;
+
+ sh->indent++;
+ if (start != end)
+ sh->indent++;
+
+ for (int nr = start; nr <= end; nr++) {
+ long long ret = probe_syscall(msb, nr);
+
+ if (ret != expect) {
+ fail("%s returned %lld, but it should have returned %s\n",
+ syscall_str(msb, nr, nr),
+ ret, expect_str);
+ err++;
+ }
+ }
+
+ if (start != end)
+ sh->indent--;
+
+ if (err) {
+ if (start != end)
+ fail("%s had %u failure%s\n",
+ syscall_str(msb, start, end),
+ err, err == 1 ? "s" : "");
+ } else {
+ ok("%s returned %s as expected\n",
+ syscall_str(msb, start, end), expect_str);
+ }
+
+ sh->indent--;
+
+ return err;
+}
+
+#define check_for(msb,start,end,expect) \
+ _check_for(msb,start,end,expect,#expect)
+
+static bool check_zero(int msb, int nr)
+{
+ return check_for(msb, nr, nr, 0);
+}
+
+static bool check_enosys(int msb, int nr)
+{
+ return check_for(msb, nr, nr, -ENOSYS);
+}
+
+/*
+ * Anyone diagnosing a failure will want to know whether the kernel
+ * supports x32. Tell them. This can also be used to conditionalize
+ * tests based on existence or nonexistence of x32.
+ */
+static bool test_x32(void)
+{
+ long long ret;
+ pid_t mypid = getpid();
+
+ run("Checking for x32 by calling x32 getpid()\n");
+ ret = probe_syscall(0, SYS_GETPID | X32_BIT);
+
+ sh->indent++;
+ if (ret == mypid) {
+ info("x32 is supported\n");
+ with_x32 = true;
+ } else if (ret == -ENOSYS) {
+ info("x32 is not supported\n");
+ with_x32 = false;
+ } else {
+ fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
+ with_x32 = false;
+ }
+ sh->indent--;
+ return with_x32;
+}
+
+static void test_syscalls_common(int msb)
+{
+ enum ptrace_pass pass = sh->ptrace_pass;
+
+ run("Checking some common syscalls as 64 bit\n");
+ check_zero(msb, SYS_READ);
+ check_zero(msb, SYS_WRITE);
+
+ run("Checking some 64-bit only syscalls as 64 bit\n");
+ check_zero(msb, X64_READV);
+ check_zero(msb, X64_WRITEV);
+
+ run("Checking out of range system calls\n");
+ check_for(msb, -64, -2, -ENOSYS);
+ if (pass >= PTP_FUZZRET)
+ check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
+ else
+ check_for(msb, -1, -1, -ENOSYS);
+ check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
+ check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
+ check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
+}
+static void test_syscalls_with_x32(int msb)
+{
/*
- * Check some syscalls with high bits set.
+ * Syscalls 512-547 are "x32" syscalls. They are
+ * intended to be called with the x32 (0x40000000) bit
+ * set. Calling them without the x32 bit set is
+ * nonsense and should not work.
*/
- printf("[RUN]\tChecking numbers above 2^32-1\n");
- check_enosys((1UL << 32), &ok);
- check_enosys(X32_BIT | (1UL << 32), &ok);
+ run("Checking x32 syscalls as 64 bit\n");
+ check_for(msb, 512, 547, -ENOSYS);
- if (!ok)
- nerrs++;
- else
- printf("[OK]\tThey all returned -ENOSYS\n");
+ run("Checking some common syscalls as x32\n");
+ check_zero(msb, SYS_READ | X32_BIT);
+ check_zero(msb, SYS_WRITE | X32_BIT);
+
+ run("Checking some x32 syscalls as x32\n");
+ check_zero(msb, X32_READV | X32_BIT);
+ check_zero(msb, X32_WRITEV | X32_BIT);
+
+ run("Checking some 64-bit syscalls as x32\n");
+ check_enosys(msb, X64_IOCTL | X32_BIT);
+ check_enosys(msb, X64_READV | X32_BIT);
+ check_enosys(msb, X64_WRITEV | X32_BIT);
}
-int main()
+static void test_syscalls_without_x32(int msb)
{
+ run("Checking for absence of x32 system calls\n");
+ check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
+}
+
+static void test_syscall_numbering(void)
+{
+ static const int msbs[] = {
+ 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
+ INT_MIN, INT_MIN+1
+ };
+
+ sh->indent++;
+
/*
- * Anyone diagnosing a failure will want to know whether the kernel
- * supports x32. Tell them.
+ * The MSB is supposed to be ignored, so we loop over a few
+ * to test that out.
*/
- printf("\tChecking for x32...");
- fflush(stdout);
- if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
- printf(" supported\n");
- } else if (errno == ENOSYS) {
- printf(" not supported\n");
+ for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
+ int msb = msbs[i];
+ run("Checking system calls with msb = %d (0x%x)\n",
+ msb, msb);
+
+ sh->indent++;
+
+ test_syscalls_common(msb);
+ if (with_x32)
+ test_syscalls_with_x32(msb);
+ else
+ test_syscalls_without_x32(msb);
+
+ sh->indent--;
+ }
+
+ sh->indent--;
+}
+
+static void syscall_numbering_tracee(void)
+{
+ enum ptrace_pass pass;
+
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+ crit("Failed to request tracing\n");
+ return;
+ }
+ raise(SIGSTOP);
+
+ for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
+ sh->ptrace_pass = ++pass) {
+ run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
+ test_syscall_numbering();
+ }
+}
+
+static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
+{
+ struct user_regs_struct regs;
+
+ sh->probing_syscall = false; /* Do this on entry only */
+
+ /* For these, don't even getregs */
+ if (pass == PTP_NOTHING || pass == PTP_DONE)
+ return;
+
+ ptrace(PTRACE_GETREGS, testpid, NULL, &regs);
+
+ if (regs.orig_rax != regs.rbx) {
+ fail("orig_rax %#llx doesn't match syscall number %#llx\n",
+ (unsigned long long)regs.orig_rax,
+ (unsigned long long)regs.rbx);
+ }
+
+ switch (pass) {
+ case PTP_GETREGS:
+ /* Just read, no writeback */
+ return;
+ case PTP_WRITEBACK:
+ /* Write back the same register state verbatim */
+ break;
+ case PTP_FUZZRET:
+ regs.rax = MODIFIED_BY_PTRACE;
+ break;
+ case PTP_FUZZHIGH:
+ regs.rax = MODIFIED_BY_PTRACE;
+ regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
+ break;
+ case PTP_INTNUM:
+ regs.rax = MODIFIED_BY_PTRACE;
+ regs.orig_rax = (int)regs.orig_rax;
+ break;
+ default:
+ crit("invalid ptrace_pass\n");
+ break;
+ }
+
+ ptrace(PTRACE_SETREGS, testpid, NULL, &regs);
+}
+
+static void syscall_numbering_tracer(pid_t testpid)
+{
+ int wstatus;
+
+ do {
+ pid_t wpid = waitpid(testpid, &wstatus, 0);
+ if (wpid < 0 && errno != EINTR)
+ break;
+ if (wpid != testpid)
+ continue;
+ if (!WIFSTOPPED(wstatus))
+ break; /* Thread exited? */
+
+ if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
+ mess_with_syscall(testpid, sh->ptrace_pass);
+ } while (sh->ptrace_pass != PTP_DONE &&
+ !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
+
+ ptrace(PTRACE_DETACH, testpid, NULL, NULL);
+
+ /* Wait for the child process to terminate */
+ while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
+ /* wait some more */;
+}
+
+static void test_traced_syscall_numbering(void)
+{
+ pid_t testpid;
+
+ /* Launch the test thread; this thread continues as the tracer thread */
+ testpid = fork();
+
+ if (testpid < 0) {
+ crit("Unable to launch tracer process\n");
+ } else if (testpid == 0) {
+ syscall_numbering_tracee();
+ _exit(0);
} else {
- printf(" confused\n");
+ syscall_numbering_tracer(testpid);
}
+}
- test_x32_without_x32_bit();
+int main(void)
+{
+ unsigned int nerr;
- return nerrs ? 1 : 0;
+ /*
+ * It is quite likely to get a segfault on a failure, so make
+ * sure the message gets out by setting stdout to nonbuffered.
+ */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ /*
+ * Harmless file descriptor to work on...
+ */
+ nullfd = open("/dev/null", O_RDWR);
+ if (nullfd < 0) {
+ crit("Unable to open /dev/null: %s\n", strerror(errno));
+ }
+
+ /*
+ * Set up a block of shared memory...
+ */
+ sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_SHARED, 0, 0);
+ if (sh == MAP_FAILED) {
+ crit("Unable to allocated shared memory block: %s\n",
+ strerror(errno));
+ }
+
+ with_x32 = test_x32();
+
+ run("Running tests without ptrace...\n");
+ test_syscall_numbering();
+
+ test_traced_syscall_numbering();
+
+ nerr = sh->nerr;
+ if (!nerr) {
+ ok("All system calls succeeded or failed as expected\n");
+ return 0;
+ } else {
+ fail("A total of %u system call%s had incorrect behavior\n",
+ nerr, nerr != 1 ? "s" : "");
+ return 1;
+ }
}